diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7abe3f310b..f803071d7e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@
 # GraphBLAS/CMakeLists.txt:  cmake script for GraphBLAS
 #-------------------------------------------------------------------------------
 
-# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 # http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 # CMakeLists.txt: instructions for cmake to build GraphBLAS.
@@ -46,19 +46,21 @@
 # get the version
 #-------------------------------------------------------------------------------
 
-# cmake 3.0 is preferred.
+# cmake 3.13 is preferred.
 cmake_minimum_required ( VERSION 2.8.12 )
 
+message ( STATUS "CMake version: " ${CMAKE_VERSION} )
+
 if ( CMAKE_VERSION VERSION_GREATER "3.0" )
     cmake_policy ( SET CMP0042 NEW )
     cmake_policy ( SET CMP0048 NEW )
 endif ( )
 
 # version of SuiteSparse:GraphBLAS
-set ( GraphBLAS_DATE "Dec 16, 2019" )
+set ( GraphBLAS_DATE "Feb 20, 2020" )
 set ( GraphBLAS_VERSION_MAJOR 3 )
-set ( GraphBLAS_VERSION_MINOR 1 )
-set ( GraphBLAS_VERSION_SUB   2 )
+set ( GraphBLAS_VERSION_MINOR 2 )
+set ( GraphBLAS_VERSION_SUB   0 )
 
 # GraphBLAS C API Specification version, at graphblas.org
 set ( GraphBLAS_API_DATE "May 18, 2018" )
@@ -68,9 +70,9 @@ set ( GraphBLAS_API_VERSION_SUB   0 )
 
 if ( CMAKE_MAJOR_VERSION GREATER 2 )
     project ( graphblas
-        VERSION "${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB}" )
+        VERSION "${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB}" LANGUAGES C )
 else ( )
-    project ( graphblas )
+    project ( graphblas C )
 endif ( )
 
 #-------------------------------------------------------------------------------
@@ -87,8 +89,8 @@ if ( NOT CMAKE_BUILD_TYPE )
 endif ( )
 
 # select "true" to build both dynamic and static libraries:
-#  set ( BUILD_GRB_STATIC_LIBRARY true )
-#  set ( BUILD_GRB_STATIC_LIBRARY false )
+# set ( BUILD_GRB_STATIC_LIBRARY true )
+# set ( BUILD_GRB_STATIC_LIBRARY false )
 # or use cmake with -DBUILD_GRB_STATIC_LIBRARY=1
 
 if ( BUILD_GRB_STATIC_LIBRARY )
@@ -97,8 +99,16 @@ else ( )
     message ( STATUS "Building dynamic GraphBLAS library only" )
 endif ( )
 
+# select "true" to enable burble, for GraphBLAS development only
+# set ( GB_BURBLE true )
+  set ( GB_BURBLE false )
+
+if ( GB_BURBLE )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -DGB_BURBLE=1 " )
+endif ( )
+
 #-------------------------------------------------------------------------------
-# find m4 and configure GraphBLAS with user-defined objects (if any)
+# configure GraphBLAS
 #-------------------------------------------------------------------------------
 
 find_program ( M4EXISTS m4 )
@@ -106,22 +116,10 @@ if ( NOT M4EXISTS )
     message ( FATAL_ERROR "m4 not found, but is required.  Install via your system package manager, or download at http://www.gnu.org/software/m4/m4.html or http://gnuwin32.sourceforge.net for Windows" )
 endif ( )
 
-# configure user-defined objects defined in User/*.m4
-file ( GLOB USER_M4 "User/*.m4" )
-set ( DEF0 "Config/user_def0.m4" )
-set ( DEF1 "Config/user_def1.m4" )
-set ( DEF2 "Config/user_def2.m4" )
-execute_process (
-    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-    COMMAND m4 -P ${DEF0} ${DEF1} ${USER_M4} ${DEF2}
-    OUTPUT_FILE "Source/all_user_objects.c"
-)
 set ( DEC0 "Config/GraphBLAS.h.in" )
-set ( DEC1 "Config/user_dec1.m4" )
-set ( DEC2 "Config/user_dec2.m4" )
 execute_process (
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-    COMMAND m4 -P ${DEC0} ${DEC1} ${USER_M4} ${DEC2}
+    COMMAND m4 -P ${DEC0}
     OUTPUT_FILE "Config/GraphBLAS.h.tmp"
 )
 
@@ -152,8 +150,9 @@ configure_file (
 
 include ( FindOpenMP  )
 include ( FindThreads )
-# enable_language ( CUDA )
 
+# FUTURE: rely on CUDA
+# enable_language ( CUDA )
 # for nvcc, add -DGBCUDA
 
 #-------------------------------------------------------------------------------
@@ -174,6 +173,42 @@ message ( STATUS "CMAKE have pthreads:       " ${CMAKE_USE_PTHREADS_INIT}  )
 message ( STATUS "CMAKE have Win32 pthreads: " ${CMAKE_USE_WIN32_THREADS_INIT} )
 message ( STATUS "CMAKE have OpenMP:         " ${OPENMP_FOUND} )
 
+#-------------------------------------------------------------------------------
+# find the BLAS
+#-------------------------------------------------------------------------------
+
+# FUTURE: CBLAS disabled for now because of a linking issue.  The problem is
+# that GraphBLAS gets linked with both libgomp and libiomp, when the MKL BLAS
+# is used.  The FindOpenMP includes libgomp and FindBLAS with BLA_VENDOR set to
+# Intel10_64ilp includes libiomp.  When this happens, incorrect results are
+# obtained from cblas_saxpy and cblas_daxpy.  The Intel MKL can work with
+# libgomp, but not when libiomp is also included.
+
+#   if ( CMAKE_VERSION VERSION_GREATER "3.13" )
+#       # Look for the parallel 64-bit MKL BLAS by default
+#       set ( BLA_VENDOR Intel10_64ilp )
+#       include ( FindBLAS )
+#       if ( ${BLAS_FOUND} )
+#           set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -DMKL_ILP64 " )
+#       else ( )
+#           message ( STATUS "CBLAS : Intel MKL not found" )
+#           # FUTURE: enable other BLAS flavors here
+#           # set ( BLA_VENDOR ... )
+#           # include ( FindBLAS )
+#       endif ( )
+#   endif ( )
+
+#   if ( ${BLAS_FOUND} )
+#       # BLAS_LINKER_FLAGS: uncached list of required linker flags
+#       # (excluding -l and -L).
+#       message ( STATUS "CBLAS:                     found" )
+#       message ( STATUS "CBLAS linker flags:        " ${BLAS_LINKER_FLAGS} )
+#       # BLAS_LIBRARIES: list of libraries to link against (may be empty)
+#       message ( STATUS "CBLAS libraries:    " ${BLAS_LIBRARIES} )
+#   else ( )
+#       message ( STATUS "CBLAS:                     not found" )
+#   endif ( )
+
 #-------------------------------------------------------------------------------
 # include directories for both graphblas and graphblasdemo libraries
 #-------------------------------------------------------------------------------
@@ -191,22 +226,25 @@ include_directories ( Source/Template Source Source/Generated Source/Generator I
 if ( "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
     # cmake 2.8 workaround: gcc needs to be told to do ANSI C11.
     # cmake 3.0 doesn't have this problem.
-    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -std=c11 -lm -Wno-pragmas -fPIC " )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -std=c11 -lm -Wno-pragmas " )
     # check all warnings (uncomment for development only)
 #   set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -Wall -Wextra -Wpedantic -Werror " )
+    # set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -g" )
     if ( CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9 )
         message ( FATAL_ERROR "gcc version must be at least 4.9" )
     endif ( )
 elseif ( "${CMAKE_C_COMPILER_ID}" STREQUAL "Intel" )
     # options for icc: also needs -std=c11
-#   set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -qopt-report -qopt-report-phase=vec" )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -diag-disable 10397,15552 " )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -qopt-report=5 -qopt-report-phase=vec" )
     # the -mp1 option is important for predictable floating-point results with
     # the icc compiler.  Without, ((float) 1.)/((float) 0.) produces NaN,
     # instead of the correct result, Inf.
-    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -std=c11 -mp1 -fPIC" )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -std=c11 -mp1" )
     # The -g option is useful for the Intel VTune tool, but it should be
     # removed in production.  Comment this line out if not in use:
     # set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -g" )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -qopt-malloc-options=3" )
     # check all warnings and remarks (uncomment for development only):
 #   set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -w3 -Wremarks -Werror " )
     if ( CMAKE_C_COMPILER_VERSION VERSION_LESS 19.0 )
@@ -223,8 +261,8 @@ elseif ( "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC" )
 elseif ( "${CMAKE_C_COMPILER_ID}" STREQUAL "PGI" )
     # options for PGI pgcc compiler.  The compiler has a bug, and the
     # -DPGI_COMPILER_BUG causes GraphBLAS to use a workaround.
-    set ( CMAKE_C_FLAGS    "${CMAKE_C_FLAGS} -Mnoopenmp -noswitcherror -c11 -lm -fPIC -DPGI_COMPILER_BUG" )
-    set ( CMAKE_CXX_FLAGS  "${CMAKE_C_FLAGS} -Mnoopenmp -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1 -noswitcherror --c++11 -lm -fPIC -DPGI_COMPILER_BUG" )
+    set ( CMAKE_C_FLAGS    "${CMAKE_C_FLAGS} -Mnoopenmp -noswitcherror -c11 -lm -DPGI_COMPILER_BUG" )
+    set ( CMAKE_CXX_FLAGS  "${CMAKE_C_FLAGS} -Mnoopenmp -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1 -noswitcherror --c++11 -lm -DPGI_COMPILER_BUG" )
 endif ( )
 
 if ( ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
@@ -325,7 +363,7 @@ endif ( )
 if ( USE_OPENMP )
     # use OpenMP for user thread synchronization
     message ( STATUS "Using OpenMP to synchronize user threads" )
-    target_link_libraries ( graphblas        ${M_LIB} ${OpenMP_C_LIBRARIES} )
+    target_link_libraries ( graphblas ${M_LIB} ${OpenMP_C_LIBRARIES} )
     if ( BUILD_GRB_STATIC_LIBRARY )
         target_link_libraries ( graphblas_static ${M_LIB} ${OpenMP_C_LIBRARIES} )
     endif ( )
@@ -333,7 +371,7 @@ if ( USE_OPENMP )
 elseif ( USE_POSIX )
     # use POSIX for user thread synchronization
     message ( STATUS "Using POSIX pthreads to synchronize user threads" )
-    target_link_libraries ( graphblas        ${M_LIB} )
+    target_link_libraries ( graphblas ${M_LIB} )
     if ( BUILD_GRB_STATIC_LIBRARY )
         target_link_libraries ( graphblas_static ${M_LIB} )
     endif ( )
@@ -341,7 +379,7 @@ elseif ( USE_POSIX )
 else ( )
     # use no threading at all
     message ( WARNING "No support for user threads; GraphBLAS will not be thread-safe" )
-    target_link_libraries ( graphblas        ${M_LIB} )
+    target_link_libraries ( graphblas ${M_LIB} )
     if ( BUILD_GRB_STATIC_LIBRARY )
         target_link_libraries ( graphblas_static ${M_LIB} )
     endif ( )
@@ -349,7 +387,7 @@ else ( )
 endif ( )
 
 if ( CMAKE_THREAD_LIBS_INIT )
-    target_link_libraries ( graphblas        ${CMAKE_THREAD_LIBS_INIT} )
+    target_link_libraries ( graphblas ${CMAKE_THREAD_LIBS_INIT} )
     if ( BUILD_GRB_STATIC_LIBRARY )
         target_link_libraries ( graphblas_static ${CMAKE_THREAD_LIBS_INIT} )
     endif ( )
@@ -359,7 +397,7 @@ if ( OPENMP_FOUND )
     # use OpenMP for internal parallelism
     message ( STATUS "Using OpenMP for internal parallelism" )
     set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
-    target_link_libraries ( graphblas        ${M_LIB} ${OpenMP_C_LIBRARIES} )
+    target_link_libraries ( graphblas ${M_LIB} ${OpenMP_C_LIBRARIES} )
     if ( BUILD_GRB_STATIC_LIBRARY )
         target_link_libraries ( graphblas_static ${M_LIB} ${OpenMP_C_LIBRARIES} )
     endif ( )
@@ -373,6 +411,13 @@ if ( CMAKE_USE_WIN32_THREADS_INIT )
     set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -DHAVE_WINDOWS_THREADS " )
 endif ( )
 
+if ( BLAS_FOUND )
+    # use the dense CBLAS
+    message ( STATUS "Using dense CBLAS for faster dense matrix/vector operations" )
+    set ( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -DGB_HAS_CBLAS " )
+    target_link_libraries ( graphblas ${BLAS_LIBRARIES} )
+endif ( )
+
 #-------------------------------------------------------------------------------
 # determine the default matrix format
 #-------------------------------------------------------------------------------
@@ -397,7 +442,7 @@ add_library ( graphblasdemo SHARED ${DEMO_SOURCES} )
 SET_TARGET_PROPERTIES ( graphblasdemo PROPERTIES
     C_STANDARD_REQUIRED 11 )
 set_property ( TARGET graphblasdemo PROPERTY C_STANDARD 11 )
-target_link_libraries ( graphblasdemo         ${M_LIB} graphblas )
+target_link_libraries ( graphblasdemo ${M_LIB} graphblas )
 
 if ( BUILD_GRB_STATIC_LIBRARY )
     add_library ( graphblasdemo_static STATIC ${DEMO_SOURCES} )
diff --git a/Config/GraphBLAS.h.in b/Config/GraphBLAS.h.in
index c8552c4b77..a2464e0f13 100644
--- a/Config/GraphBLAS.h.in
+++ b/Config/GraphBLAS.h.in
@@ -2,7 +2,7 @@
 // GraphBLAS.h: definitions for the GraphBLAS package
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -123,13 +123,13 @@
 
 // The 'about' string the describes this particular implementation of GraphBLAS:
 #define GxB_IMPLEMENTATION_ABOUT \
-"SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, "                   \
+"SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, "                   \
 "All Rights Reserved.\n"                                                     \
 "http://suitesparse.com  Dept of Computer Sci. & Eng, Texas A&M University\n"
 
 // The GraphBLAS license for this particular implementation of GraphBLAS:
 #define GxB_IMPLEMENTATION_LICENSE \
-"SuiteSparse:GraphBLAS, Copyright 2017-2019, Timothy A. Davis\n"             \
+"SuiteSparse:GraphBLAS, Copyright 2017-2020, Timothy A. Davis\n"             \
 "\n"                                                                         \
 "Licensed under the Apache License, Version 2.0 (the \"License\");\n"        \
 "you may not use SuiteSparse:GraphBLAS except in compliance with the\n"      \
@@ -663,12 +663,12 @@ typedef struct GB_BinaryOp_opaque *GrB_BinaryOp ;
 //------------------------------------------------------------------------------
 
 // There are three sets of built-in binary operators.  For the first set of
-// 19 kinds of operators, x,y,z all have the same type, and they are available
-// for all 11 types, for a total of 19*11 = 209 operators.  All of them have
+// 21 kinds of operators, x,y,z all have the same type, and they are available
+// for all 11 types, for a total of 21*21 = 231 operators.  All of them have
 // a "_TYPE" suffix that denotes the type of x,y,z:
 
-//      10 general: FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES,
-//              DIV, RDIV
+//      12 general: FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES,
+//              DIV, RDIV, PAIR, ANY
 //      6 comparison: ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE
 //      3 logical: LOR, LAND, LXOR
 
@@ -683,12 +683,12 @@ typedef struct GB_BinaryOp_opaque *GrB_BinaryOp ;
 
 //      3 logical: LOR, LAND, LXOR
 
-// Thus there are 209+66+3 = 278 built-in binary operators.  Some are redundant
+// Thus there are 231+66+3 = 300 built-in binary operators.  Some are redundant
 // but are included to keep the name space of operators uniform.
 
-// For 10 binary operators z=f(x,y), x, y, and z are all the same type:
-// FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV, for all 11
-// types.
+// For 12 binary operators z=f(x,y), x, y, and z are all the same type:
+// FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV, PAIR, ANY,
+// for all 11 types.
 
 GB_PUBLIC GrB_BinaryOp
     // z = x            z = y               z = min(x,y)        z = max (x,y)
@@ -717,18 +717,18 @@ GB_PUBLIC GrB_BinaryOp
     GrB_PLUS_FP32,      GrB_MINUS_FP32,     GrB_TIMES_FP32,     GrB_DIV_FP32,
     GrB_PLUS_FP64,      GrB_MINUS_FP64,     GrB_TIMES_FP64,     GrB_DIV_FP64,
 
-    // z = y-x          z = y/x
-    GxB_RMINUS_BOOL,    GxB_RDIV_BOOL,      // ADDED in V3.0: RMINUS, RDIV
-    GxB_RMINUS_INT8,    GxB_RDIV_INT8,
-    GxB_RMINUS_UINT8,   GxB_RDIV_UINT8,
-    GxB_RMINUS_INT16,   GxB_RDIV_INT16,
-    GxB_RMINUS_UINT16,  GxB_RDIV_UINT16,
-    GxB_RMINUS_INT32,   GxB_RDIV_INT32,
-    GxB_RMINUS_UINT32,  GxB_RDIV_UINT32,
-    GxB_RMINUS_INT64,   GxB_RDIV_INT64,
-    GxB_RMINUS_UINT64,  GxB_RDIV_UINT64,
-    GxB_RMINUS_FP32,    GxB_RDIV_FP32,
-    GxB_RMINUS_FP64,    GxB_RDIV_FP64,
+    // z = y-x          z = y/x             z = 1               z = pick(x,y)
+    GxB_RMINUS_BOOL,    GxB_RDIV_BOOL,      GxB_PAIR_BOOL,      GxB_ANY_BOOL,
+    GxB_RMINUS_INT8,    GxB_RDIV_INT8,      GxB_PAIR_INT8,      GxB_ANY_INT8,
+    GxB_RMINUS_UINT8,   GxB_RDIV_UINT8,     GxB_PAIR_UINT8,     GxB_ANY_UINT8,
+    GxB_RMINUS_INT16,   GxB_RDIV_INT16,     GxB_PAIR_INT16,     GxB_ANY_INT16,
+    GxB_RMINUS_UINT16,  GxB_RDIV_UINT16,    GxB_PAIR_UINT16,    GxB_ANY_UINT16,
+    GxB_RMINUS_INT32,   GxB_RDIV_INT32,     GxB_PAIR_INT32,     GxB_ANY_INT32,
+    GxB_RMINUS_UINT32,  GxB_RDIV_UINT32,    GxB_PAIR_UINT32,    GxB_ANY_UINT32,
+    GxB_RMINUS_INT64,   GxB_RDIV_INT64,     GxB_PAIR_INT64,     GxB_ANY_INT64,
+    GxB_RMINUS_UINT64,  GxB_RDIV_UINT64,    GxB_PAIR_UINT64,    GxB_ANY_UINT64,
+    GxB_RMINUS_FP32,    GxB_RDIV_FP32,      GxB_PAIR_FP32,      GxB_ANY_FP32,
+    GxB_RMINUS_FP64,    GxB_RDIV_FP64,      GxB_PAIR_FP64,      GxB_ANY_FP64,
 
 // Six comparison operators z=f(x,y) return the same type as their inputs.
 // Each of them compute z = (x OP y), where x, y, and z all have the same type.
@@ -863,10 +863,11 @@ GB_PUBLIC GrB_BinaryOp
 // GrB_IDENTITY_BOOL, GrB_AINV_BOOL, and GrB_MINV_BOOL all give the same result
 // (z = x).
 
-// With this convention for boolean "division", there are 10 unique binary
+// With this convention for boolean "division", there are 11 unique binary
 // operators that are purely boolean; 13 *_BOOL operators are redundant but are
 // included in GraphBLAS so that the name space of operators is complete:
 
+//      z = 1           PAIR
 //      z = x           FIRST, DIV
 //      z = y           SECOND, RDIV
 //      z = (x && y)    AND, MIN, TIMES
@@ -3336,11 +3337,14 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 //       In other words, C<Mask> = accum (C,T) is split into Z = accum(C,T) ;
 //       C=0 ; C<Mask> = Z.
 //
-// GrB_MASK: can be GxB_DEFAULT or GrB_SCMP.  If GxB_DEFAULT, the mask is used
+// GrB_MASK: can be GxB_DEFAULT, GrB_COMP, GrB_STRUCTURE, or set to both
+//      GrB_COMP and GrB_STRUCTURE.  If GxB_DEFAULT, the mask is used
 //      normally, where Mask(i,j)=1 means C(i,j) can be modified by C<Mask>=Z,
 //      and Mask(i,j)=0 means it cannot be modified even if Z(i,j) is has been
-//      computed and differs from C(i,j).  If GrB_SCMP, this is the same as
-//      taking the logical complement of the Mask.
+//      computed and differs from C(i,j).  If GrB_COMP, this is the same as
+//      taking the logical complement of the Mask.  If GrB_STRUCTURE is set,
+//      the value of the mask is not considered, just its pattern.  The
+//      GrB_COMP and GrB_STRUCTURE settings can be combined.
 //
 // GrB_INP0: can be GxB_DEFAULT or GrB_TRAN.  If GxB_DEFAULT, the first input
 //      is used as-is.  If GrB_TRAN, it is transposed.  Only matrices are
@@ -3369,22 +3373,34 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 //      except that floating-point roundoff may differ when working on
 //      floating-point data types.
 //
-//      GxB_AxB_GUSTAVSON:  Gustavon's method, computing C(:,j)=A*B(,j) via
-//          a gather/scatter workspace of size equal to the number of rows of A.
-//          Very good general-purpose method, but sometimes the workspace can be
-//          too large when many threads are used..
+//      GxB_AxB_SAXPY:  C(:,j)=A*B(:,j) is computed using a mix of Gustavson,
+//          Hash, and (in the future) the Heap method.  Each task in the
+//          parallel computation makes its own decision, via a heuristic.
 //
-//      GxB_AxB_HEAP: a heap-based method, computing C(:,j)=A*B(:,j) via a heap
-//          of size equal to the maximum number of entries in any column of B.
-//          Very good for hypersparse matrices, particularly when nnz(B) is
-//          less than the number of rows of A.
+//      GxB_AxB_GUSTAVSON:  This is the same as GxB_AxB_SAXPY, except that
+//          every task uses Gustavon's method, computing C(:,j)=A*B(:,j) via a
+//          gather/scatter workspace of size equal to the number of rows of A.
+//          Very good general-purpose method, but sometimes the workspace can
+//          be too large when many threads are used.
+//
+//      GxB_AxB_HEAP: a heap-based saxpy-style method, computing
+//          C(:,j)=A*B(:,j) via a heap of size equal to the maximum number of
+//          entries in any column of B.  Very good for hypersparse matrices,
+//          particularly when nnz(B) is less than the number of rows of A.
+//          The Heap method is no longer available in v3.2, so it is silently
+//          replaced with GxB_AxB_HASH.  It may reappear in a future version.
+//
+//      GxB_AxB_HASH: This is the same as GxB_AxB_SAXPY, except that every
+//          task uses the Hash method.  Like the Heap method, it is very good
+//          for hypersparse matrices and uses very little workspace (but more
+//          workspace than the Heap method).
 //
 //      GxB_AxB_DOT: computes C(i,j) = A(:,i)'*B(:,j), for each entry C(i,j).
 //          A very specialized method that works well only if the mask is
-//          present, very sparse, and not complemented, or when C is tiny.
-//          It is impossibly slow if C is large and the mask is not present,
-//          since it takes Omega(m*n) time if C is m-by-n.  Uses a 2-phase
-//          method.  The first phase is symbolic, and the 2nd phase is numeric.
+//          present, very sparse, and not complemented, when C is a dense
+//          vector or matrix, or when C is tiny.  It is impossibly slow if C is
+//          large and the mask is not present, since it takes Omega(m*n) time
+//          if C is m-by-n.
 
 // GxB_NTHREADS and GxB_CHUNK are an enumerated value in both the
 // GrB_Desc_Field and the GxB_Option_Field.  They are defined with the same
@@ -3393,16 +3409,9 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 #define GxB_NTHREADS 5
 #define GxB_CHUNK 7
 
-// GxB_NTHREADS_MAX is a compile-time constant that gives the upper bound on
-// the number of threads that GraphBLAS can use.  This thread count is the sum
-// of the maximum number of user threads and the number of internal OpenMP
-// threads created inside GraphBLAS by each user thread (nthreads_max, which
-// can be set by GxB_set (GxB_NTHREADS, nthreads_max)).  It is the maximum
-// permitted value of the run-time value nthreads_max.  This constant can be
-// changed at compile-time by using -DGxB_NTHREADS_MAX=4096, for example.  The
-// upper limit below should be large enough...
+// GxB_NTHREADS_MAX is no longer used, as of v3.2.0.
 #ifndef GxB_NTHREADS_MAX
-#define GxB_NTHREADS_MAX 2048
+#define GxB_NTHREADS_MAX INT32_MAX
 #endif
 
 typedef enum
@@ -3425,8 +3434,10 @@ typedef enum
 }
 GrB_Desc_Field ;
 
-// SPEC: GxB_DEFAULT, GxB_NTHREADS, GxB_CHUNK and GxB_AxB_* are extensionsi
-// to the spec.
+// SPEC: GxB_DEFAULT, GxB_NTHREADS, GxB_CHUNK and GxB_AxB_* are extensions to
+// the spec.  In the spec, setting both GrB_COMP and GrB_STRUCTURE can be done
+// with two calls to GrB_Descriptor_set.  As an extension to the spec, they can
+// also be set with a single call, using the setting GrB_COMP+GrB_STRUCTURE.
 
 typedef enum
 {
@@ -3436,8 +3447,10 @@ typedef enum
     // for GrB_OUTP only:
     GrB_REPLACE = 1,    // clear the output before assigning new values to it
 
-    // for GrB_MASK only: these two options are identical
-    GrB_SCMP = 2,       // use the structural complement of the input
+    // for GrB_MASK only:
+    GrB_COMP = 2,       // use the structural complement of the input
+    GrB_SCMP = 2,       // same as GrB_COMP (deprecated; use GrB_COMP instead)
+    GrB_STRUCTURE = 4,  // use the only pattern of the mask, not its values
 
     // for GrB_INP0 and GrB_INP1 only:
     GrB_TRAN = 3,       // use the transpose of the input
@@ -3446,7 +3459,8 @@ typedef enum
     GxB_AxB_GUSTAVSON = 1001,   // gather-scatter saxpy method
     GxB_AxB_HEAP      = 1002,   // heap-based saxpy method
     GxB_AxB_DOT       = 1003,   // dot product
-//  GxB_AxB_HASH      = 1004    // hash-based saxpy method (FUTURE)
+    GxB_AxB_HASH      = 1004,   // hash-based saxpy method
+    GxB_AxB_SAXPY     = 1005    // saxpy method (any kind)
 }
 GrB_Desc_Value ;
 
@@ -3520,6 +3534,63 @@ GrB_Info GrB_Descriptor_free    // free a descriptor
     GrB_Descriptor *descriptor  // handle of descriptor to free
 ) ;
 
+// Predefined descriptors and their values:
+
+GB_PUBLIC
+GrB_Descriptor     // OUTP         MASK           MASK       INP0      INP1
+                   //              structural     complement
+                   // ===========  ============== ========== ========  ========
+
+// GrB_NULL        // -            -              -          -         -
+GrB_DESC_T1      , // -            -              -          -         GrB_TRAN
+GrB_DESC_T0      , // -            -              -          GrB_TRAN  -       
+GrB_DESC_T0T1    , // -            -              -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_C       , // -            -              GrB_COMP   -         -       
+GrB_DESC_CT1     , // -            -              GrB_COMP   -         GrB_TRAN
+GrB_DESC_CT0     , // -            -              GrB_COMP   GrB_TRAN  -       
+GrB_DESC_CT0T1   , // -            -              GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_S       , // -            GrB_STRUCTURE  -          -         -       
+GrB_DESC_ST1     , // -            GrB_STRUCTURE  -          -         GrB_TRAN
+GrB_DESC_ST0     , // -            GrB_STRUCTURE  -          GrB_TRAN  -       
+GrB_DESC_ST0T1   , // -            GrB_STRUCTURE  -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_SC      , // -            GrB_STRUCTURE  GrB_COMP   -         -       
+GrB_DESC_SCT1    , // -            GrB_STRUCTURE  GrB_COMP   -         GrB_TRAN
+GrB_DESC_SCT0    , // -            GrB_STRUCTURE  GrB_COMP   GrB_TRAN  -       
+GrB_DESC_SCT0T1  , // -            GrB_STRUCTURE  GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_R       , // GrB_REPLACE  -              -          -         -       
+GrB_DESC_RT1     , // GrB_REPLACE  -              -          -         GrB_TRAN
+GrB_DESC_RT0     , // GrB_REPLACE  -              -          GrB_TRAN  -       
+GrB_DESC_RT0T1   , // GrB_REPLACE  -              -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RC      , // GrB_REPLACE  -              GrB_COMP   -         -       
+GrB_DESC_RCT1    , // GrB_REPLACE  -              GrB_COMP   -         GrB_TRAN
+GrB_DESC_RCT0    , // GrB_REPLACE  -              GrB_COMP   GrB_TRAN  -       
+GrB_DESC_RCT0T1  , // GrB_REPLACE  -              GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RS      , // GrB_REPLACE  GrB_STRUCTURE  -          -         -       
+GrB_DESC_RST1    , // GrB_REPLACE  GrB_STRUCTURE  -          -         GrB_TRAN
+GrB_DESC_RST0    , // GrB_REPLACE  GrB_STRUCTURE  -          GrB_TRAN  -       
+GrB_DESC_RST0T1  , // GrB_REPLACE  GrB_STRUCTURE  -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RSC     , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   -         -       
+GrB_DESC_RSCT1   , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   -         GrB_TRAN
+GrB_DESC_RSCT0   , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   GrB_TRAN  -       
+GrB_DESC_RSCT0T1 ; // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   GrB_TRAN  GrB_TRAN
+
+// GrB_NULL is the default descriptor, with all settings at their defaults:
+//
+//      OUTP: do not replace the output
+//      MASK: mask is valued and not complemented
+//      INP0: first input not transposed
+//      INP1: second input not transposed
+
+// Predefined descriptors may not be modified or freed.  Attempting to modify
+// them results in an error (GrB_INVALID_VALUE).  Attempts to free them are
+// silently ignored.
 
 //==============================================================================
 //=== SuiteSparse:GraphBLAS options ============================================
@@ -3586,7 +3657,8 @@ typedef enum            // for global options or matrix options
     GxB_API_VERSION = 16,           // API version (3 int's)
     GxB_API_DATE = 17,              // date of the API (char *)
     GxB_API_ABOUT = 18,             // about the API (char *)
-    GxB_API_URL = 19                // URL for the API (char *)
+    GxB_API_URL = 19,               // URL for the API (char *)
+    GxB_BURBLE = 20                 // development only (bool *)
 
 } GxB_Option_Field ;
 
@@ -3699,7 +3771,6 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_set (GxB_FORMAT, GxB_BY_COL) ;
 //      GxB_get (GxB_FORMAT, GxB_Format_Value *s) ;
 //
-//      // see the GxB_NTHREADS_MAX discussion above
 //      GxB_set (GxB_NTHREADS, nthreads_max) ;
 //      GxB_get (GxB_NTHREADS, int *nthreads_max) ;
 //
@@ -3734,7 +3805,8 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_get (GrB_Descriptor d, GrB_OUTP, GrB_Desc_Value *v) ;
 //
 //      GxB_set (GrB_Descriptor d, GrB_MASK, GxB_DEFAULT) ;
-//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_SCMP) ;
+//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_COMP) ;
+//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_STRUCTURE) ;
 //      GxB_get (GrB_Descriptor d, GrB_MASK, GrB_Desc_Value *v) ;
 //
 //      GxB_set (GrB_Descriptor d, GrB_INP0, GxB_DEFAULT) ;
@@ -3748,6 +3820,8 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_DEFAULT) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HEAP) ;
+//      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HASH) ;
+//      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_SAXPY) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_DOT) ;
 //      GxB_get (GrB_Descriptor d, GrB_AxB_METHOD, GrB_Desc_Value *v) ;
 //
@@ -4298,7 +4372,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 //      and no part of C outside the submatrix is ever modified.  In
 //      GrB_assign, it is possible to modify C outside the submatrix, but only
 //      in one specific manner.  Suppose the mask M is present (or, suppose it
-//      is not present but GrB_SCMP is true).  After (optionally) complementing
+//      is not present but GrB_COMP is true).  After (optionally) complementing
 //      the mask, the value of M(i,j) can be 0 for some entry outside the
 //      C(I,J) submatrix.  If the GrB_REPLACE descriptor is true, the
 //      GrB_assign deletes this entry.  This case does not occur if GrB_REPLACE
@@ -4308,7 +4382,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 
 // GxB_subassign and GrB_assign are identical if GrB_REPLACE is set to its
 // default value of false, or if the masks happen to be the same.  The two
-// masks can be the same in two cases:  either there is no mask (and GrB_SCMP
+// masks can be the same in two cases:  either there is no mask (and GrB_COMP
 // is false), or I and J are both GrB_ALL.  In this case, the two algorithms
 // are identical and have the same performance.
 
@@ -4330,7 +4404,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 // matrix and vector subassign: C(I,J)<Mask> = accum (C(I,J), A)
 // matrix and vector    assign: C<Mask>(I,J) = accum (C(I,J), A)
 
-// This notation does not include the details of the GrB_SCMP and GrB_REPLACE
+// This notation does not include the details of the GrB_COMP and GrB_REPLACE
 // descriptors, but it does illustrate the difference in the Mask.  In the
 // subassign, Mask is the same size as C(I,J) and A.  If I[0]=i and J[0]=j,
 // Then Mask(0,0) controls how C(i,j) is modified by the subassign, from the
@@ -5779,7 +5853,7 @@ GrB_Info GrB_Matrix_reduce_UDT      // c = accum (c, reduce_to_scalar (A))
 // GrB_Matrix_reduce_Monoid   (w,mask,acc,mo,A,d) // w<mask> = acc (w,reduce(A))
 // GrB_Matrix_reduce_BinaryOp (w,mask,acc,op,A,d) // w<mask> = acc (w,reduce(A))
 // reduce matrix to scalar:
-// GrB_Vector_reduce_[SCALAR] (c,acc,monoid,u,d)  // c = acc (c,reduce(A))
+// GrB_Vector_reduce_[SCALAR] (c,acc,monoid,u,d)  // c = acc (c,reduce(u))
 // GrB_Matrix_reduce_[SCALAR] (c,acc,monoid,A,d)  // c = acc (c,reduce(A))
 
 #if GxB_STDC_VERSION >= 201112L
@@ -5893,9 +5967,9 @@ GrB_Info GrB_transpose              // C<Mask> = accum (C, A')
 // built-in monoids
 //------------------------------------------------------------------------------
 
-// 44 unique monoids can be constructed using built-in types and operators, all
-// of which are defined below.  Four operators (min, max, plus, times) are
-// available for each of the 10 non-Boolean types, and four purely Boolean
+// 55 monoids can be constructed using built-in types and operators, all of
+// which are defined below.  Five operators (min, max, plus, times, any) are
+// available for each of the 10 non-Boolean types, plus five purely Boolean
 // monoids are available.
 
 GB_PUBLIC GrB_Monoid
@@ -5948,7 +6022,20 @@ GB_PUBLIC GrB_Monoid
     GxB_TIMES_FP32_MONOID,        // identity: 1            terminal: none
     GxB_TIMES_FP64_MONOID,        // identity: 1            terminal: none
 
+    // ANY monoids:
+    GxB_ANY_INT8_MONOID,          // identity: any value    terminal: any value
+    GxB_ANY_INT16_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_INT32_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_INT64_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_UINT8_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_UINT16_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_UINT32_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_UINT64_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_FP32_MONOID,          // identity: any value    terminal: any value
+    GxB_ANY_FP64_MONOID,          // identity: any value    terminal: any value
+
     // Boolean monoids:
+    GxB_ANY_BOOL_MONOID,          // identity: any value    terminal: any value
     GxB_LOR_BOOL_MONOID,          // identity: false        terminal: true
     GxB_LAND_BOOL_MONOID,         // identity: true         terminal: false
     GxB_LXOR_BOOL_MONOID,         // identity: false
@@ -5958,34 +6045,40 @@ GB_PUBLIC GrB_Monoid
 // built-in semirings
 //------------------------------------------------------------------------------
 
-// Using built-in types and operators, 960 unique semirings can be built.  This
-// count excludes redundant Boolean operators (for example GxB_TIMES_BOOL and
+// Using built-in types and operators, 1355 unique semirings can be built.
+// This count excludes redundant semirings (for example GxB_TIMES_BOOL and
 // GxB_LAND_BOOL are different operators but they are redundant since they
-// always return the same result):
+// always return the same result).
 
-// 760 semirings with a multiply operator TxT -> T where T is non-Boolean, from
-// the complete cross product of:
+// 1000 semirings with a multiply operator TxT -> T where T is non-Boolean,
+// from the complete cross product of:
 
-//      4 add monoids (MIN, MAX, PLUS, TIMES)
-//      19 multiply operators:
-//          (FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
+//      5 add monoids: MIN, MAX, PLUS, TIMES, ANY
+//      20 multiply operators:
+//           FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
 //           ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//           LOR, LAND, LXOR)
+//           LOR, LAND, LXOR
 //      10 non-Boolean types, T
 
-// 240 semirings with a comparison operator TxT -> bool, where T is
+// 300 semirings with a comparison operator TxT -> bool, where T is
 // non-Boolean, from the complete cross product of:
 
-//      4 Boolean add monoids: (LAND, LOR, LXOR, EQ)
-//      6 multiply operators: (EQ, NE, GT, LT, GE, LE)
+//      5 Boolean add monoids: LAND, LOR, LXOR, EQ, ANY
+//      6 multiply operators: EQ, NE, GT, LT, GE, LE
 //      10 non-Boolean types, T
 
-// 40 semirings with purely Boolean types, bool x bool -> bool, from the
+// 55 semirings with purely Boolean types, bool x bool -> bool, from the
 // complete cross product of:
 
-//      4 Boolean add monoids (LAND, LOR, LXOR, EQ)
-//      10 multiply operators:
-//          (FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE)
+//      5 Boolean add monoids LAND, LOR, LXOR, EQ, ANY
+//      11 multiply operators:
+//          FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE, PAIR
+
+// The ANY operator is also valid to use as a multiplicative operator in a
+// semiring, but serves no purpose in that case.  The ANY operator is meant as
+// a fast additive operator for a monoid, that terminates, or short-circuits,
+// as soon as any value is found.  A valid user semiring can be constructed
+// with ANY as the multiply operator, but they are not predefined below.
 
 // In the names below, each semiring has a name of the form GxB_add_mult_T
 // where add is the additive monoid, mult is the multiply operator, and T is
@@ -5997,330 +6090,342 @@ GB_PUBLIC GrB_Monoid
 GB_PUBLIC GrB_Semiring
 
 //------------------------------------------------------------------------------
-// 680 non-Boolean semirings where all types are the same, given by suffix _T
-//------------------------------------------------------------------------------
-
-// semirings with multiply op: z = FIRST (x,y), all types x,y,z the same:
-GxB_MIN_FIRST_INT8     , GxB_MAX_FIRST_INT8     , GxB_PLUS_FIRST_INT8    , GxB_TIMES_FIRST_INT8   ,
-GxB_MIN_FIRST_UINT8    , GxB_MAX_FIRST_UINT8    , GxB_PLUS_FIRST_UINT8   , GxB_TIMES_FIRST_UINT8  ,
-GxB_MIN_FIRST_INT16    , GxB_MAX_FIRST_INT16    , GxB_PLUS_FIRST_INT16   , GxB_TIMES_FIRST_INT16  ,
-GxB_MIN_FIRST_UINT16   , GxB_MAX_FIRST_UINT16   , GxB_PLUS_FIRST_UINT16  , GxB_TIMES_FIRST_UINT16 ,
-GxB_MIN_FIRST_INT32    , GxB_MAX_FIRST_INT32    , GxB_PLUS_FIRST_INT32   , GxB_TIMES_FIRST_INT32  ,
-GxB_MIN_FIRST_UINT32   , GxB_MAX_FIRST_UINT32   , GxB_PLUS_FIRST_UINT32  , GxB_TIMES_FIRST_UINT32 ,
-GxB_MIN_FIRST_INT64    , GxB_MAX_FIRST_INT64    , GxB_PLUS_FIRST_INT64   , GxB_TIMES_FIRST_INT64  ,
-GxB_MIN_FIRST_UINT64   , GxB_MAX_FIRST_UINT64   , GxB_PLUS_FIRST_UINT64  , GxB_TIMES_FIRST_UINT64 ,
-GxB_MIN_FIRST_FP32     , GxB_MAX_FIRST_FP32     , GxB_PLUS_FIRST_FP32    , GxB_TIMES_FIRST_FP32   ,
-GxB_MIN_FIRST_FP64     , GxB_MAX_FIRST_FP64     , GxB_PLUS_FIRST_FP64    , GxB_TIMES_FIRST_FP64   ,
-
-// semirings with multiply op: z = SECOND (x,y), all types x,y,z the same:
-GxB_MIN_SECOND_INT8    , GxB_MAX_SECOND_INT8    , GxB_PLUS_SECOND_INT8   , GxB_TIMES_SECOND_INT8  ,
-GxB_MIN_SECOND_UINT8   , GxB_MAX_SECOND_UINT8   , GxB_PLUS_SECOND_UINT8  , GxB_TIMES_SECOND_UINT8 ,
-GxB_MIN_SECOND_INT16   , GxB_MAX_SECOND_INT16   , GxB_PLUS_SECOND_INT16  , GxB_TIMES_SECOND_INT16 ,
-GxB_MIN_SECOND_UINT16  , GxB_MAX_SECOND_UINT16  , GxB_PLUS_SECOND_UINT16 , GxB_TIMES_SECOND_UINT16,
-GxB_MIN_SECOND_INT32   , GxB_MAX_SECOND_INT32   , GxB_PLUS_SECOND_INT32  , GxB_TIMES_SECOND_INT32 ,
-GxB_MIN_SECOND_UINT32  , GxB_MAX_SECOND_UINT32  , GxB_PLUS_SECOND_UINT32 , GxB_TIMES_SECOND_UINT32,
-GxB_MIN_SECOND_INT64   , GxB_MAX_SECOND_INT64   , GxB_PLUS_SECOND_INT64  , GxB_TIMES_SECOND_INT64 ,
-GxB_MIN_SECOND_UINT64  , GxB_MAX_SECOND_UINT64  , GxB_PLUS_SECOND_UINT64 , GxB_TIMES_SECOND_UINT64,
-GxB_MIN_SECOND_FP32    , GxB_MAX_SECOND_FP32    , GxB_PLUS_SECOND_FP32   , GxB_TIMES_SECOND_FP32  ,
-GxB_MIN_SECOND_FP64    , GxB_MAX_SECOND_FP64    , GxB_PLUS_SECOND_FP64   , GxB_TIMES_SECOND_FP64  ,
-
-// semirings with multiply op: z = MIN (x,y), all types x,y,z the same:
-GxB_MIN_MIN_INT8       , GxB_MAX_MIN_INT8       , GxB_PLUS_MIN_INT8      , GxB_TIMES_MIN_INT8     ,
-GxB_MIN_MIN_UINT8      , GxB_MAX_MIN_UINT8      , GxB_PLUS_MIN_UINT8     , GxB_TIMES_MIN_UINT8    ,
-GxB_MIN_MIN_INT16      , GxB_MAX_MIN_INT16      , GxB_PLUS_MIN_INT16     , GxB_TIMES_MIN_INT16    ,
-GxB_MIN_MIN_UINT16     , GxB_MAX_MIN_UINT16     , GxB_PLUS_MIN_UINT16    , GxB_TIMES_MIN_UINT16   ,
-GxB_MIN_MIN_INT32      , GxB_MAX_MIN_INT32      , GxB_PLUS_MIN_INT32     , GxB_TIMES_MIN_INT32    ,
-GxB_MIN_MIN_UINT32     , GxB_MAX_MIN_UINT32     , GxB_PLUS_MIN_UINT32    , GxB_TIMES_MIN_UINT32   ,
-GxB_MIN_MIN_INT64      , GxB_MAX_MIN_INT64      , GxB_PLUS_MIN_INT64     , GxB_TIMES_MIN_INT64    ,
-GxB_MIN_MIN_UINT64     , GxB_MAX_MIN_UINT64     , GxB_PLUS_MIN_UINT64    , GxB_TIMES_MIN_UINT64   ,
-GxB_MIN_MIN_FP32       , GxB_MAX_MIN_FP32       , GxB_PLUS_MIN_FP32      , GxB_TIMES_MIN_FP32     ,
-GxB_MIN_MIN_FP64       , GxB_MAX_MIN_FP64       , GxB_PLUS_MIN_FP64      , GxB_TIMES_MIN_FP64     ,
-
-// semirings with multiply op: z = MAX (x,y), all types x,y,z the same:
-GxB_MIN_MAX_INT8       , GxB_MAX_MAX_INT8       , GxB_PLUS_MAX_INT8      , GxB_TIMES_MAX_INT8     ,
-GxB_MIN_MAX_UINT8      , GxB_MAX_MAX_UINT8      , GxB_PLUS_MAX_UINT8     , GxB_TIMES_MAX_UINT8    ,
-GxB_MIN_MAX_INT16      , GxB_MAX_MAX_INT16      , GxB_PLUS_MAX_INT16     , GxB_TIMES_MAX_INT16    ,
-GxB_MIN_MAX_UINT16     , GxB_MAX_MAX_UINT16     , GxB_PLUS_MAX_UINT16    , GxB_TIMES_MAX_UINT16   ,
-GxB_MIN_MAX_INT32      , GxB_MAX_MAX_INT32      , GxB_PLUS_MAX_INT32     , GxB_TIMES_MAX_INT32    ,
-GxB_MIN_MAX_UINT32     , GxB_MAX_MAX_UINT32     , GxB_PLUS_MAX_UINT32    , GxB_TIMES_MAX_UINT32   ,
-GxB_MIN_MAX_INT64      , GxB_MAX_MAX_INT64      , GxB_PLUS_MAX_INT64     , GxB_TIMES_MAX_INT64    ,
-GxB_MIN_MAX_UINT64     , GxB_MAX_MAX_UINT64     , GxB_PLUS_MAX_UINT64    , GxB_TIMES_MAX_UINT64   ,
-GxB_MIN_MAX_FP32       , GxB_MAX_MAX_FP32       , GxB_PLUS_MAX_FP32      , GxB_TIMES_MAX_FP32     ,
-GxB_MIN_MAX_FP64       , GxB_MAX_MAX_FP64       , GxB_PLUS_MAX_FP64      , GxB_TIMES_MAX_FP64     ,
-
-// semirings with multiply op: z = PLUS (x,y), all types x,y,z the same:
-GxB_MIN_PLUS_INT8      , GxB_MAX_PLUS_INT8      , GxB_PLUS_PLUS_INT8     , GxB_TIMES_PLUS_INT8    ,
-GxB_MIN_PLUS_UINT8     , GxB_MAX_PLUS_UINT8     , GxB_PLUS_PLUS_UINT8    , GxB_TIMES_PLUS_UINT8   ,
-GxB_MIN_PLUS_INT16     , GxB_MAX_PLUS_INT16     , GxB_PLUS_PLUS_INT16    , GxB_TIMES_PLUS_INT16   ,
-GxB_MIN_PLUS_UINT16    , GxB_MAX_PLUS_UINT16    , GxB_PLUS_PLUS_UINT16   , GxB_TIMES_PLUS_UINT16  ,
-GxB_MIN_PLUS_INT32     , GxB_MAX_PLUS_INT32     , GxB_PLUS_PLUS_INT32    , GxB_TIMES_PLUS_INT32   ,
-GxB_MIN_PLUS_UINT32    , GxB_MAX_PLUS_UINT32    , GxB_PLUS_PLUS_UINT32   , GxB_TIMES_PLUS_UINT32  ,
-GxB_MIN_PLUS_INT64     , GxB_MAX_PLUS_INT64     , GxB_PLUS_PLUS_INT64    , GxB_TIMES_PLUS_INT64   ,
-GxB_MIN_PLUS_UINT64    , GxB_MAX_PLUS_UINT64    , GxB_PLUS_PLUS_UINT64   , GxB_TIMES_PLUS_UINT64  ,
-GxB_MIN_PLUS_FP32      , GxB_MAX_PLUS_FP32      , GxB_PLUS_PLUS_FP32     , GxB_TIMES_PLUS_FP32    ,
-GxB_MIN_PLUS_FP64      , GxB_MAX_PLUS_FP64      , GxB_PLUS_PLUS_FP64     , GxB_TIMES_PLUS_FP64    ,
-
-// semirings with multiply op: z = MINUS (x,y), all types x,y,z the same:
-GxB_MIN_MINUS_INT8     , GxB_MAX_MINUS_INT8     , GxB_PLUS_MINUS_INT8    , GxB_TIMES_MINUS_INT8   ,
-GxB_MIN_MINUS_UINT8    , GxB_MAX_MINUS_UINT8    , GxB_PLUS_MINUS_UINT8   , GxB_TIMES_MINUS_UINT8  ,
-GxB_MIN_MINUS_INT16    , GxB_MAX_MINUS_INT16    , GxB_PLUS_MINUS_INT16   , GxB_TIMES_MINUS_INT16  ,
-GxB_MIN_MINUS_UINT16   , GxB_MAX_MINUS_UINT16   , GxB_PLUS_MINUS_UINT16  , GxB_TIMES_MINUS_UINT16 ,
-GxB_MIN_MINUS_INT32    , GxB_MAX_MINUS_INT32    , GxB_PLUS_MINUS_INT32   , GxB_TIMES_MINUS_INT32  ,
-GxB_MIN_MINUS_UINT32   , GxB_MAX_MINUS_UINT32   , GxB_PLUS_MINUS_UINT32  , GxB_TIMES_MINUS_UINT32 ,
-GxB_MIN_MINUS_INT64    , GxB_MAX_MINUS_INT64    , GxB_PLUS_MINUS_INT64   , GxB_TIMES_MINUS_INT64  ,
-GxB_MIN_MINUS_UINT64   , GxB_MAX_MINUS_UINT64   , GxB_PLUS_MINUS_UINT64  , GxB_TIMES_MINUS_UINT64 ,
-GxB_MIN_MINUS_FP32     , GxB_MAX_MINUS_FP32     , GxB_PLUS_MINUS_FP32    , GxB_TIMES_MINUS_FP32   ,
-GxB_MIN_MINUS_FP64     , GxB_MAX_MINUS_FP64     , GxB_PLUS_MINUS_FP64    , GxB_TIMES_MINUS_FP64   ,
-
-// ADDED in V3.0: semirings with RDIV and RMINUS:
-
-// semirings with multiply op: z = RMINUS (x,y), all types x,y,z the same:
-GxB_MIN_RMINUS_INT8    , GxB_MAX_RMINUS_INT8    , GxB_PLUS_RMINUS_INT8   , GxB_TIMES_RMINUS_INT8   ,
-GxB_MIN_RMINUS_UINT8   , GxB_MAX_RMINUS_UINT8   , GxB_PLUS_RMINUS_UINT8  , GxB_TIMES_RMINUS_UINT8  ,
-GxB_MIN_RMINUS_INT16   , GxB_MAX_RMINUS_INT16   , GxB_PLUS_RMINUS_INT16  , GxB_TIMES_RMINUS_INT16  ,
-GxB_MIN_RMINUS_UINT16  , GxB_MAX_RMINUS_UINT16  , GxB_PLUS_RMINUS_UINT16 , GxB_TIMES_RMINUS_UINT16 ,
-GxB_MIN_RMINUS_INT32   , GxB_MAX_RMINUS_INT32   , GxB_PLUS_RMINUS_INT32  , GxB_TIMES_RMINUS_INT32  ,
-GxB_MIN_RMINUS_UINT32  , GxB_MAX_RMINUS_UINT32  , GxB_PLUS_RMINUS_UINT32 , GxB_TIMES_RMINUS_UINT32 ,
-GxB_MIN_RMINUS_INT64   , GxB_MAX_RMINUS_INT64   , GxB_PLUS_RMINUS_INT64  , GxB_TIMES_RMINUS_INT64  ,
-GxB_MIN_RMINUS_UINT64  , GxB_MAX_RMINUS_UINT64  , GxB_PLUS_RMINUS_UINT64 , GxB_TIMES_RMINUS_UINT64 ,
-GxB_MIN_RMINUS_FP32    , GxB_MAX_RMINUS_FP32    , GxB_PLUS_RMINUS_FP32   , GxB_TIMES_RMINUS_FP32   ,
-GxB_MIN_RMINUS_FP64    , GxB_MAX_RMINUS_FP64    , GxB_PLUS_RMINUS_FP64   , GxB_TIMES_RMINUS_FP64   ,
-
-// semirings with multiply op: z = TIMES (x,y), all types x,y,z the same:
-GxB_MIN_TIMES_INT8     , GxB_MAX_TIMES_INT8     , GxB_PLUS_TIMES_INT8    , GxB_TIMES_TIMES_INT8   ,
-GxB_MIN_TIMES_UINT8    , GxB_MAX_TIMES_UINT8    , GxB_PLUS_TIMES_UINT8   , GxB_TIMES_TIMES_UINT8  ,
-GxB_MIN_TIMES_INT16    , GxB_MAX_TIMES_INT16    , GxB_PLUS_TIMES_INT16   , GxB_TIMES_TIMES_INT16  ,
-GxB_MIN_TIMES_UINT16   , GxB_MAX_TIMES_UINT16   , GxB_PLUS_TIMES_UINT16  , GxB_TIMES_TIMES_UINT16 ,
-GxB_MIN_TIMES_INT32    , GxB_MAX_TIMES_INT32    , GxB_PLUS_TIMES_INT32   , GxB_TIMES_TIMES_INT32  ,
-GxB_MIN_TIMES_UINT32   , GxB_MAX_TIMES_UINT32   , GxB_PLUS_TIMES_UINT32  , GxB_TIMES_TIMES_UINT32 ,
-GxB_MIN_TIMES_INT64    , GxB_MAX_TIMES_INT64    , GxB_PLUS_TIMES_INT64   , GxB_TIMES_TIMES_INT64  ,
-GxB_MIN_TIMES_UINT64   , GxB_MAX_TIMES_UINT64   , GxB_PLUS_TIMES_UINT64  , GxB_TIMES_TIMES_UINT64 ,
-GxB_MIN_TIMES_FP32     , GxB_MAX_TIMES_FP32     , GxB_PLUS_TIMES_FP32    , GxB_TIMES_TIMES_FP32   ,
-GxB_MIN_TIMES_FP64     , GxB_MAX_TIMES_FP64     , GxB_PLUS_TIMES_FP64    , GxB_TIMES_TIMES_FP64   ,
-
-// semirings with multiply op: z = DIV (x,y), all types x,y,z the same:
-GxB_MIN_DIV_INT8       , GxB_MAX_DIV_INT8       , GxB_PLUS_DIV_INT8      , GxB_TIMES_DIV_INT8     ,
-GxB_MIN_DIV_UINT8      , GxB_MAX_DIV_UINT8      , GxB_PLUS_DIV_UINT8     , GxB_TIMES_DIV_UINT8    ,
-GxB_MIN_DIV_INT16      , GxB_MAX_DIV_INT16      , GxB_PLUS_DIV_INT16     , GxB_TIMES_DIV_INT16    ,
-GxB_MIN_DIV_UINT16     , GxB_MAX_DIV_UINT16     , GxB_PLUS_DIV_UINT16    , GxB_TIMES_DIV_UINT16   ,
-GxB_MIN_DIV_INT32      , GxB_MAX_DIV_INT32      , GxB_PLUS_DIV_INT32     , GxB_TIMES_DIV_INT32    ,
-GxB_MIN_DIV_UINT32     , GxB_MAX_DIV_UINT32     , GxB_PLUS_DIV_UINT32    , GxB_TIMES_DIV_UINT32   ,
-GxB_MIN_DIV_INT64      , GxB_MAX_DIV_INT64      , GxB_PLUS_DIV_INT64     , GxB_TIMES_DIV_INT64    ,
-GxB_MIN_DIV_UINT64     , GxB_MAX_DIV_UINT64     , GxB_PLUS_DIV_UINT64    , GxB_TIMES_DIV_UINT64   ,
-GxB_MIN_DIV_FP32       , GxB_MAX_DIV_FP32       , GxB_PLUS_DIV_FP32      , GxB_TIMES_DIV_FP32     ,
-GxB_MIN_DIV_FP64       , GxB_MAX_DIV_FP64       , GxB_PLUS_DIV_FP64      , GxB_TIMES_DIV_FP64     ,
-
-// semirings with multiply op: z = RDIV (x,y), all types x,y,z the same:
-GxB_MIN_RDIV_INT8      , GxB_MAX_RDIV_INT8      , GxB_PLUS_RDIV_INT8     , GxB_TIMES_RDIV_INT8    ,
-GxB_MIN_RDIV_UINT8     , GxB_MAX_RDIV_UINT8     , GxB_PLUS_RDIV_UINT8    , GxB_TIMES_RDIV_UINT8   ,
-GxB_MIN_RDIV_INT16     , GxB_MAX_RDIV_INT16     , GxB_PLUS_RDIV_INT16    , GxB_TIMES_RDIV_INT16   ,
-GxB_MIN_RDIV_UINT16    , GxB_MAX_RDIV_UINT16    , GxB_PLUS_RDIV_UINT16   , GxB_TIMES_RDIV_UINT16  ,
-GxB_MIN_RDIV_INT32     , GxB_MAX_RDIV_INT32     , GxB_PLUS_RDIV_INT32    , GxB_TIMES_RDIV_INT32   ,
-GxB_MIN_RDIV_UINT32    , GxB_MAX_RDIV_UINT32    , GxB_PLUS_RDIV_UINT32   , GxB_TIMES_RDIV_UINT32  ,
-GxB_MIN_RDIV_INT64     , GxB_MAX_RDIV_INT64     , GxB_PLUS_RDIV_INT64    , GxB_TIMES_RDIV_INT64   ,
-GxB_MIN_RDIV_UINT64    , GxB_MAX_RDIV_UINT64    , GxB_PLUS_RDIV_UINT64   , GxB_TIMES_RDIV_UINT64  ,
-GxB_MIN_RDIV_FP32      , GxB_MAX_RDIV_FP32      , GxB_PLUS_RDIV_FP32     , GxB_TIMES_RDIV_FP32    ,
-GxB_MIN_RDIV_FP64      , GxB_MAX_RDIV_FP64      , GxB_PLUS_RDIV_FP64     , GxB_TIMES_RDIV_FP64    ,
-
-// semirings with multiply op: z = ISEQ (x,y), all types x,y,z the same:
-GxB_MIN_ISEQ_INT8      , GxB_MAX_ISEQ_INT8      , GxB_PLUS_ISEQ_INT8     , GxB_TIMES_ISEQ_INT8    ,
-GxB_MIN_ISEQ_UINT8     , GxB_MAX_ISEQ_UINT8     , GxB_PLUS_ISEQ_UINT8    , GxB_TIMES_ISEQ_UINT8   ,
-GxB_MIN_ISEQ_INT16     , GxB_MAX_ISEQ_INT16     , GxB_PLUS_ISEQ_INT16    , GxB_TIMES_ISEQ_INT16   ,
-GxB_MIN_ISEQ_UINT16    , GxB_MAX_ISEQ_UINT16    , GxB_PLUS_ISEQ_UINT16   , GxB_TIMES_ISEQ_UINT16  ,
-GxB_MIN_ISEQ_INT32     , GxB_MAX_ISEQ_INT32     , GxB_PLUS_ISEQ_INT32    , GxB_TIMES_ISEQ_INT32   ,
-GxB_MIN_ISEQ_UINT32    , GxB_MAX_ISEQ_UINT32    , GxB_PLUS_ISEQ_UINT32   , GxB_TIMES_ISEQ_UINT32  ,
-GxB_MIN_ISEQ_INT64     , GxB_MAX_ISEQ_INT64     , GxB_PLUS_ISEQ_INT64    , GxB_TIMES_ISEQ_INT64   ,
-GxB_MIN_ISEQ_UINT64    , GxB_MAX_ISEQ_UINT64    , GxB_PLUS_ISEQ_UINT64   , GxB_TIMES_ISEQ_UINT64  ,
-GxB_MIN_ISEQ_FP32      , GxB_MAX_ISEQ_FP32      , GxB_PLUS_ISEQ_FP32     , GxB_TIMES_ISEQ_FP32    ,
-GxB_MIN_ISEQ_FP64      , GxB_MAX_ISEQ_FP64      , GxB_PLUS_ISEQ_FP64     , GxB_TIMES_ISEQ_FP64    ,
-
-// semirings with multiply op: z = ISNE (x,y), all types x,y,z the same:
-GxB_MIN_ISNE_INT8      , GxB_MAX_ISNE_INT8      , GxB_PLUS_ISNE_INT8     , GxB_TIMES_ISNE_INT8    ,
-GxB_MIN_ISNE_UINT8     , GxB_MAX_ISNE_UINT8     , GxB_PLUS_ISNE_UINT8    , GxB_TIMES_ISNE_UINT8   ,
-GxB_MIN_ISNE_INT16     , GxB_MAX_ISNE_INT16     , GxB_PLUS_ISNE_INT16    , GxB_TIMES_ISNE_INT16   ,
-GxB_MIN_ISNE_UINT16    , GxB_MAX_ISNE_UINT16    , GxB_PLUS_ISNE_UINT16   , GxB_TIMES_ISNE_UINT16  ,
-GxB_MIN_ISNE_INT32     , GxB_MAX_ISNE_INT32     , GxB_PLUS_ISNE_INT32    , GxB_TIMES_ISNE_INT32   ,
-GxB_MIN_ISNE_UINT32    , GxB_MAX_ISNE_UINT32    , GxB_PLUS_ISNE_UINT32   , GxB_TIMES_ISNE_UINT32  ,
-GxB_MIN_ISNE_INT64     , GxB_MAX_ISNE_INT64     , GxB_PLUS_ISNE_INT64    , GxB_TIMES_ISNE_INT64   ,
-GxB_MIN_ISNE_UINT64    , GxB_MAX_ISNE_UINT64    , GxB_PLUS_ISNE_UINT64   , GxB_TIMES_ISNE_UINT64  ,
-GxB_MIN_ISNE_FP32      , GxB_MAX_ISNE_FP32      , GxB_PLUS_ISNE_FP32     , GxB_TIMES_ISNE_FP32    ,
-GxB_MIN_ISNE_FP64      , GxB_MAX_ISNE_FP64      , GxB_PLUS_ISNE_FP64     , GxB_TIMES_ISNE_FP64    ,
-
-// semirings with multiply op: z = ISGT (x,y), all types x,y,z the same:
-GxB_MIN_ISGT_INT8      , GxB_MAX_ISGT_INT8      , GxB_PLUS_ISGT_INT8     , GxB_TIMES_ISGT_INT8    ,
-GxB_MIN_ISGT_UINT8     , GxB_MAX_ISGT_UINT8     , GxB_PLUS_ISGT_UINT8    , GxB_TIMES_ISGT_UINT8   ,
-GxB_MIN_ISGT_INT16     , GxB_MAX_ISGT_INT16     , GxB_PLUS_ISGT_INT16    , GxB_TIMES_ISGT_INT16   ,
-GxB_MIN_ISGT_UINT16    , GxB_MAX_ISGT_UINT16    , GxB_PLUS_ISGT_UINT16   , GxB_TIMES_ISGT_UINT16  ,
-GxB_MIN_ISGT_INT32     , GxB_MAX_ISGT_INT32     , GxB_PLUS_ISGT_INT32    , GxB_TIMES_ISGT_INT32   ,
-GxB_MIN_ISGT_UINT32    , GxB_MAX_ISGT_UINT32    , GxB_PLUS_ISGT_UINT32   , GxB_TIMES_ISGT_UINT32  ,
-GxB_MIN_ISGT_INT64     , GxB_MAX_ISGT_INT64     , GxB_PLUS_ISGT_INT64    , GxB_TIMES_ISGT_INT64   ,
-GxB_MIN_ISGT_UINT64    , GxB_MAX_ISGT_UINT64    , GxB_PLUS_ISGT_UINT64   , GxB_TIMES_ISGT_UINT64  ,
-GxB_MIN_ISGT_FP32      , GxB_MAX_ISGT_FP32      , GxB_PLUS_ISGT_FP32     , GxB_TIMES_ISGT_FP32    ,
-GxB_MIN_ISGT_FP64      , GxB_MAX_ISGT_FP64      , GxB_PLUS_ISGT_FP64     , GxB_TIMES_ISGT_FP64    ,
-
-// semirings with multiply op: z = ISLT (x,y), all types x,y,z the same:
-GxB_MIN_ISLT_INT8      , GxB_MAX_ISLT_INT8      , GxB_PLUS_ISLT_INT8     , GxB_TIMES_ISLT_INT8    ,
-GxB_MIN_ISLT_UINT8     , GxB_MAX_ISLT_UINT8     , GxB_PLUS_ISLT_UINT8    , GxB_TIMES_ISLT_UINT8   ,
-GxB_MIN_ISLT_INT16     , GxB_MAX_ISLT_INT16     , GxB_PLUS_ISLT_INT16    , GxB_TIMES_ISLT_INT16   ,
-GxB_MIN_ISLT_UINT16    , GxB_MAX_ISLT_UINT16    , GxB_PLUS_ISLT_UINT16   , GxB_TIMES_ISLT_UINT16  ,
-GxB_MIN_ISLT_INT32     , GxB_MAX_ISLT_INT32     , GxB_PLUS_ISLT_INT32    , GxB_TIMES_ISLT_INT32   ,
-GxB_MIN_ISLT_UINT32    , GxB_MAX_ISLT_UINT32    , GxB_PLUS_ISLT_UINT32   , GxB_TIMES_ISLT_UINT32  ,
-GxB_MIN_ISLT_INT64     , GxB_MAX_ISLT_INT64     , GxB_PLUS_ISLT_INT64    , GxB_TIMES_ISLT_INT64   ,
-GxB_MIN_ISLT_UINT64    , GxB_MAX_ISLT_UINT64    , GxB_PLUS_ISLT_UINT64   , GxB_TIMES_ISLT_UINT64  ,
-GxB_MIN_ISLT_FP32      , GxB_MAX_ISLT_FP32      , GxB_PLUS_ISLT_FP32     , GxB_TIMES_ISLT_FP32    ,
-GxB_MIN_ISLT_FP64      , GxB_MAX_ISLT_FP64      , GxB_PLUS_ISLT_FP64     , GxB_TIMES_ISLT_FP64    ,
-
-// semirings with multiply op: z = ISGE (x,y), all types x,y,z the same:
-GxB_MIN_ISGE_INT8      , GxB_MAX_ISGE_INT8      , GxB_PLUS_ISGE_INT8     , GxB_TIMES_ISGE_INT8    ,
-GxB_MIN_ISGE_UINT8     , GxB_MAX_ISGE_UINT8     , GxB_PLUS_ISGE_UINT8    , GxB_TIMES_ISGE_UINT8   ,
-GxB_MIN_ISGE_INT16     , GxB_MAX_ISGE_INT16     , GxB_PLUS_ISGE_INT16    , GxB_TIMES_ISGE_INT16   ,
-GxB_MIN_ISGE_UINT16    , GxB_MAX_ISGE_UINT16    , GxB_PLUS_ISGE_UINT16   , GxB_TIMES_ISGE_UINT16  ,
-GxB_MIN_ISGE_INT32     , GxB_MAX_ISGE_INT32     , GxB_PLUS_ISGE_INT32    , GxB_TIMES_ISGE_INT32   ,
-GxB_MIN_ISGE_UINT32    , GxB_MAX_ISGE_UINT32    , GxB_PLUS_ISGE_UINT32   , GxB_TIMES_ISGE_UINT32  ,
-GxB_MIN_ISGE_INT64     , GxB_MAX_ISGE_INT64     , GxB_PLUS_ISGE_INT64    , GxB_TIMES_ISGE_INT64   ,
-GxB_MIN_ISGE_UINT64    , GxB_MAX_ISGE_UINT64    , GxB_PLUS_ISGE_UINT64   , GxB_TIMES_ISGE_UINT64  ,
-GxB_MIN_ISGE_FP32      , GxB_MAX_ISGE_FP32      , GxB_PLUS_ISGE_FP32     , GxB_TIMES_ISGE_FP32    ,
-GxB_MIN_ISGE_FP64      , GxB_MAX_ISGE_FP64      , GxB_PLUS_ISGE_FP64     , GxB_TIMES_ISGE_FP64    ,
-
-// semirings with multiply op: z = ISLE (x,y), all types x,y,z the same:
-GxB_MIN_ISLE_INT8      , GxB_MAX_ISLE_INT8      , GxB_PLUS_ISLE_INT8     , GxB_TIMES_ISLE_INT8    ,
-GxB_MIN_ISLE_UINT8     , GxB_MAX_ISLE_UINT8     , GxB_PLUS_ISLE_UINT8    , GxB_TIMES_ISLE_UINT8   ,
-GxB_MIN_ISLE_INT16     , GxB_MAX_ISLE_INT16     , GxB_PLUS_ISLE_INT16    , GxB_TIMES_ISLE_INT16   ,
-GxB_MIN_ISLE_UINT16    , GxB_MAX_ISLE_UINT16    , GxB_PLUS_ISLE_UINT16   , GxB_TIMES_ISLE_UINT16  ,
-GxB_MIN_ISLE_INT32     , GxB_MAX_ISLE_INT32     , GxB_PLUS_ISLE_INT32    , GxB_TIMES_ISLE_INT32   ,
-GxB_MIN_ISLE_UINT32    , GxB_MAX_ISLE_UINT32    , GxB_PLUS_ISLE_UINT32   , GxB_TIMES_ISLE_UINT32  ,
-GxB_MIN_ISLE_INT64     , GxB_MAX_ISLE_INT64     , GxB_PLUS_ISLE_INT64    , GxB_TIMES_ISLE_INT64   ,
-GxB_MIN_ISLE_UINT64    , GxB_MAX_ISLE_UINT64    , GxB_PLUS_ISLE_UINT64   , GxB_TIMES_ISLE_UINT64  ,
-GxB_MIN_ISLE_FP32      , GxB_MAX_ISLE_FP32      , GxB_PLUS_ISLE_FP32     , GxB_TIMES_ISLE_FP32    ,
-GxB_MIN_ISLE_FP64      , GxB_MAX_ISLE_FP64      , GxB_PLUS_ISLE_FP64     , GxB_TIMES_ISLE_FP64    ,
-
-// semirings with multiply op: z = LOR (x,y), all types x,y,z the same:
-GxB_MIN_LOR_INT8       , GxB_MAX_LOR_INT8       , GxB_PLUS_LOR_INT8      , GxB_TIMES_LOR_INT8     ,
-GxB_MIN_LOR_UINT8      , GxB_MAX_LOR_UINT8      , GxB_PLUS_LOR_UINT8     , GxB_TIMES_LOR_UINT8    ,
-GxB_MIN_LOR_INT16      , GxB_MAX_LOR_INT16      , GxB_PLUS_LOR_INT16     , GxB_TIMES_LOR_INT16    ,
-GxB_MIN_LOR_UINT16     , GxB_MAX_LOR_UINT16     , GxB_PLUS_LOR_UINT16    , GxB_TIMES_LOR_UINT16   ,
-GxB_MIN_LOR_INT32      , GxB_MAX_LOR_INT32      , GxB_PLUS_LOR_INT32     , GxB_TIMES_LOR_INT32    ,
-GxB_MIN_LOR_UINT32     , GxB_MAX_LOR_UINT32     , GxB_PLUS_LOR_UINT32    , GxB_TIMES_LOR_UINT32   ,
-GxB_MIN_LOR_INT64      , GxB_MAX_LOR_INT64      , GxB_PLUS_LOR_INT64     , GxB_TIMES_LOR_INT64    ,
-GxB_MIN_LOR_UINT64     , GxB_MAX_LOR_UINT64     , GxB_PLUS_LOR_UINT64    , GxB_TIMES_LOR_UINT64   ,
-GxB_MIN_LOR_FP32       , GxB_MAX_LOR_FP32       , GxB_PLUS_LOR_FP32      , GxB_TIMES_LOR_FP32     ,
-GxB_MIN_LOR_FP64       , GxB_MAX_LOR_FP64       , GxB_PLUS_LOR_FP64      , GxB_TIMES_LOR_FP64     ,
-
-// semirings with multiply op: z = LAND (x,y), all types x,y,z the same:
-GxB_MIN_LAND_INT8      , GxB_MAX_LAND_INT8      , GxB_PLUS_LAND_INT8     , GxB_TIMES_LAND_INT8    ,
-GxB_MIN_LAND_UINT8     , GxB_MAX_LAND_UINT8     , GxB_PLUS_LAND_UINT8    , GxB_TIMES_LAND_UINT8   ,
-GxB_MIN_LAND_INT16     , GxB_MAX_LAND_INT16     , GxB_PLUS_LAND_INT16    , GxB_TIMES_LAND_INT16   ,
-GxB_MIN_LAND_UINT16    , GxB_MAX_LAND_UINT16    , GxB_PLUS_LAND_UINT16   , GxB_TIMES_LAND_UINT16  ,
-GxB_MIN_LAND_INT32     , GxB_MAX_LAND_INT32     , GxB_PLUS_LAND_INT32    , GxB_TIMES_LAND_INT32   ,
-GxB_MIN_LAND_UINT32    , GxB_MAX_LAND_UINT32    , GxB_PLUS_LAND_UINT32   , GxB_TIMES_LAND_UINT32  ,
-GxB_MIN_LAND_INT64     , GxB_MAX_LAND_INT64     , GxB_PLUS_LAND_INT64    , GxB_TIMES_LAND_INT64   ,
-GxB_MIN_LAND_UINT64    , GxB_MAX_LAND_UINT64    , GxB_PLUS_LAND_UINT64   , GxB_TIMES_LAND_UINT64  ,
-GxB_MIN_LAND_FP32      , GxB_MAX_LAND_FP32      , GxB_PLUS_LAND_FP32     , GxB_TIMES_LAND_FP32    ,
-GxB_MIN_LAND_FP64      , GxB_MAX_LAND_FP64      , GxB_PLUS_LAND_FP64     , GxB_TIMES_LAND_FP64    ,
-
-// semirings with multiply op: z = LXOR (x,y), all types x,y,z the same:
-GxB_MIN_LXOR_INT8      , GxB_MAX_LXOR_INT8      , GxB_PLUS_LXOR_INT8     , GxB_TIMES_LXOR_INT8    ,
-GxB_MIN_LXOR_UINT8     , GxB_MAX_LXOR_UINT8     , GxB_PLUS_LXOR_UINT8    , GxB_TIMES_LXOR_UINT8   ,
-GxB_MIN_LXOR_INT16     , GxB_MAX_LXOR_INT16     , GxB_PLUS_LXOR_INT16    , GxB_TIMES_LXOR_INT16   ,
-GxB_MIN_LXOR_UINT16    , GxB_MAX_LXOR_UINT16    , GxB_PLUS_LXOR_UINT16   , GxB_TIMES_LXOR_UINT16  ,
-GxB_MIN_LXOR_INT32     , GxB_MAX_LXOR_INT32     , GxB_PLUS_LXOR_INT32    , GxB_TIMES_LXOR_INT32   ,
-GxB_MIN_LXOR_UINT32    , GxB_MAX_LXOR_UINT32    , GxB_PLUS_LXOR_UINT32   , GxB_TIMES_LXOR_UINT32  ,
-GxB_MIN_LXOR_INT64     , GxB_MAX_LXOR_INT64     , GxB_PLUS_LXOR_INT64    , GxB_TIMES_LXOR_INT64   ,
-GxB_MIN_LXOR_UINT64    , GxB_MAX_LXOR_UINT64    , GxB_PLUS_LXOR_UINT64   , GxB_TIMES_LXOR_UINT64  ,
-GxB_MIN_LXOR_FP32      , GxB_MAX_LXOR_FP32      , GxB_PLUS_LXOR_FP32     , GxB_TIMES_LXOR_FP32    ,
-GxB_MIN_LXOR_FP64      , GxB_MAX_LXOR_FP64      , GxB_PLUS_LXOR_FP64     , GxB_TIMES_LXOR_FP64    ,
-
-//------------------------------------------------------------------------------
-// 240 semirings with comparison ops of the form TxT->bool, and Boolean monoids
-//------------------------------------------------------------------------------
-
-// semirings with multiply op: z = EQ (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_EQ_INT8        , GxB_LAND_EQ_INT8       , GxB_LXOR_EQ_INT8       , GxB_EQ_EQ_INT8         ,
-GxB_LOR_EQ_UINT8       , GxB_LAND_EQ_UINT8      , GxB_LXOR_EQ_UINT8      , GxB_EQ_EQ_UINT8        ,
-GxB_LOR_EQ_INT16       , GxB_LAND_EQ_INT16      , GxB_LXOR_EQ_INT16      , GxB_EQ_EQ_INT16        ,
-GxB_LOR_EQ_UINT16      , GxB_LAND_EQ_UINT16     , GxB_LXOR_EQ_UINT16     , GxB_EQ_EQ_UINT16       ,
-GxB_LOR_EQ_INT32       , GxB_LAND_EQ_INT32      , GxB_LXOR_EQ_INT32      , GxB_EQ_EQ_INT32        ,
-GxB_LOR_EQ_UINT32      , GxB_LAND_EQ_UINT32     , GxB_LXOR_EQ_UINT32     , GxB_EQ_EQ_UINT32       ,
-GxB_LOR_EQ_INT64       , GxB_LAND_EQ_INT64      , GxB_LXOR_EQ_INT64      , GxB_EQ_EQ_INT64        ,
-GxB_LOR_EQ_UINT64      , GxB_LAND_EQ_UINT64     , GxB_LXOR_EQ_UINT64     , GxB_EQ_EQ_UINT64       ,
-GxB_LOR_EQ_FP32        , GxB_LAND_EQ_FP32       , GxB_LXOR_EQ_FP32       , GxB_EQ_EQ_FP32         ,
-GxB_LOR_EQ_FP64        , GxB_LAND_EQ_FP64       , GxB_LXOR_EQ_FP64       , GxB_EQ_EQ_FP64         ,
-
-// semirings with multiply op: z = NE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_NE_INT8        , GxB_LAND_NE_INT8       , GxB_LXOR_NE_INT8       , GxB_EQ_NE_INT8         ,
-GxB_LOR_NE_UINT8       , GxB_LAND_NE_UINT8      , GxB_LXOR_NE_UINT8      , GxB_EQ_NE_UINT8        ,
-GxB_LOR_NE_INT16       , GxB_LAND_NE_INT16      , GxB_LXOR_NE_INT16      , GxB_EQ_NE_INT16        ,
-GxB_LOR_NE_UINT16      , GxB_LAND_NE_UINT16     , GxB_LXOR_NE_UINT16     , GxB_EQ_NE_UINT16       ,
-GxB_LOR_NE_INT32       , GxB_LAND_NE_INT32      , GxB_LXOR_NE_INT32      , GxB_EQ_NE_INT32        ,
-GxB_LOR_NE_UINT32      , GxB_LAND_NE_UINT32     , GxB_LXOR_NE_UINT32     , GxB_EQ_NE_UINT32       ,
-GxB_LOR_NE_INT64       , GxB_LAND_NE_INT64      , GxB_LXOR_NE_INT64      , GxB_EQ_NE_INT64        ,
-GxB_LOR_NE_UINT64      , GxB_LAND_NE_UINT64     , GxB_LXOR_NE_UINT64     , GxB_EQ_NE_UINT64       ,
-GxB_LOR_NE_FP32        , GxB_LAND_NE_FP32       , GxB_LXOR_NE_FP32       , GxB_EQ_NE_FP32         ,
-GxB_LOR_NE_FP64        , GxB_LAND_NE_FP64       , GxB_LXOR_NE_FP64       , GxB_EQ_NE_FP64         ,
-
-// semirings with multiply op: z = GT (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_GT_INT8        , GxB_LAND_GT_INT8       , GxB_LXOR_GT_INT8       , GxB_EQ_GT_INT8         ,
-GxB_LOR_GT_UINT8       , GxB_LAND_GT_UINT8      , GxB_LXOR_GT_UINT8      , GxB_EQ_GT_UINT8        ,
-GxB_LOR_GT_INT16       , GxB_LAND_GT_INT16      , GxB_LXOR_GT_INT16      , GxB_EQ_GT_INT16        ,
-GxB_LOR_GT_UINT16      , GxB_LAND_GT_UINT16     , GxB_LXOR_GT_UINT16     , GxB_EQ_GT_UINT16       ,
-GxB_LOR_GT_INT32       , GxB_LAND_GT_INT32      , GxB_LXOR_GT_INT32      , GxB_EQ_GT_INT32        ,
-GxB_LOR_GT_UINT32      , GxB_LAND_GT_UINT32     , GxB_LXOR_GT_UINT32     , GxB_EQ_GT_UINT32       ,
-GxB_LOR_GT_INT64       , GxB_LAND_GT_INT64      , GxB_LXOR_GT_INT64      , GxB_EQ_GT_INT64        ,
-GxB_LOR_GT_UINT64      , GxB_LAND_GT_UINT64     , GxB_LXOR_GT_UINT64     , GxB_EQ_GT_UINT64       ,
-GxB_LOR_GT_FP32        , GxB_LAND_GT_FP32       , GxB_LXOR_GT_FP32       , GxB_EQ_GT_FP32         ,
-GxB_LOR_GT_FP64        , GxB_LAND_GT_FP64       , GxB_LXOR_GT_FP64       , GxB_EQ_GT_FP64         ,
-
-// semirings with multiply op: z = LT (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_LT_INT8        , GxB_LAND_LT_INT8       , GxB_LXOR_LT_INT8       , GxB_EQ_LT_INT8         ,
-GxB_LOR_LT_UINT8       , GxB_LAND_LT_UINT8      , GxB_LXOR_LT_UINT8      , GxB_EQ_LT_UINT8        ,
-GxB_LOR_LT_INT16       , GxB_LAND_LT_INT16      , GxB_LXOR_LT_INT16      , GxB_EQ_LT_INT16        ,
-GxB_LOR_LT_UINT16      , GxB_LAND_LT_UINT16     , GxB_LXOR_LT_UINT16     , GxB_EQ_LT_UINT16       ,
-GxB_LOR_LT_INT32       , GxB_LAND_LT_INT32      , GxB_LXOR_LT_INT32      , GxB_EQ_LT_INT32        ,
-GxB_LOR_LT_UINT32      , GxB_LAND_LT_UINT32     , GxB_LXOR_LT_UINT32     , GxB_EQ_LT_UINT32       ,
-GxB_LOR_LT_INT64       , GxB_LAND_LT_INT64      , GxB_LXOR_LT_INT64      , GxB_EQ_LT_INT64        ,
-GxB_LOR_LT_UINT64      , GxB_LAND_LT_UINT64     , GxB_LXOR_LT_UINT64     , GxB_EQ_LT_UINT64       ,
-GxB_LOR_LT_FP32        , GxB_LAND_LT_FP32       , GxB_LXOR_LT_FP32       , GxB_EQ_LT_FP32         ,
-GxB_LOR_LT_FP64        , GxB_LAND_LT_FP64       , GxB_LXOR_LT_FP64       , GxB_EQ_LT_FP64         ,
-
-// semirings with multiply op: z = GE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_GE_INT8        , GxB_LAND_GE_INT8       , GxB_LXOR_GE_INT8       , GxB_EQ_GE_INT8         ,
-GxB_LOR_GE_UINT8       , GxB_LAND_GE_UINT8      , GxB_LXOR_GE_UINT8      , GxB_EQ_GE_UINT8        ,
-GxB_LOR_GE_INT16       , GxB_LAND_GE_INT16      , GxB_LXOR_GE_INT16      , GxB_EQ_GE_INT16        ,
-GxB_LOR_GE_UINT16      , GxB_LAND_GE_UINT16     , GxB_LXOR_GE_UINT16     , GxB_EQ_GE_UINT16       ,
-GxB_LOR_GE_INT32       , GxB_LAND_GE_INT32      , GxB_LXOR_GE_INT32      , GxB_EQ_GE_INT32        ,
-GxB_LOR_GE_UINT32      , GxB_LAND_GE_UINT32     , GxB_LXOR_GE_UINT32     , GxB_EQ_GE_UINT32       ,
-GxB_LOR_GE_INT64       , GxB_LAND_GE_INT64      , GxB_LXOR_GE_INT64      , GxB_EQ_GE_INT64        ,
-GxB_LOR_GE_UINT64      , GxB_LAND_GE_UINT64     , GxB_LXOR_GE_UINT64     , GxB_EQ_GE_UINT64       ,
-GxB_LOR_GE_FP32        , GxB_LAND_GE_FP32       , GxB_LXOR_GE_FP32       , GxB_EQ_GE_FP32         ,
-GxB_LOR_GE_FP64        , GxB_LAND_GE_FP64       , GxB_LXOR_GE_FP64       , GxB_EQ_GE_FP64         ,
-
-// semirings with multiply op: z = LE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_LE_INT8        , GxB_LAND_LE_INT8       , GxB_LXOR_LE_INT8       , GxB_EQ_LE_INT8         ,
-GxB_LOR_LE_UINT8       , GxB_LAND_LE_UINT8      , GxB_LXOR_LE_UINT8      , GxB_EQ_LE_UINT8        ,
-GxB_LOR_LE_INT16       , GxB_LAND_LE_INT16      , GxB_LXOR_LE_INT16      , GxB_EQ_LE_INT16        ,
-GxB_LOR_LE_UINT16      , GxB_LAND_LE_UINT16     , GxB_LXOR_LE_UINT16     , GxB_EQ_LE_UINT16       ,
-GxB_LOR_LE_INT32       , GxB_LAND_LE_INT32      , GxB_LXOR_LE_INT32      , GxB_EQ_LE_INT32        ,
-GxB_LOR_LE_UINT32      , GxB_LAND_LE_UINT32     , GxB_LXOR_LE_UINT32     , GxB_EQ_LE_UINT32       ,
-GxB_LOR_LE_INT64       , GxB_LAND_LE_INT64      , GxB_LXOR_LE_INT64      , GxB_EQ_LE_INT64        ,
-GxB_LOR_LE_UINT64      , GxB_LAND_LE_UINT64     , GxB_LXOR_LE_UINT64     , GxB_EQ_LE_UINT64       ,
-GxB_LOR_LE_FP32        , GxB_LAND_LE_FP32       , GxB_LXOR_LE_FP32       , GxB_EQ_LE_FP32         ,
-GxB_LOR_LE_FP64        , GxB_LAND_LE_FP64       , GxB_LXOR_LE_FP64       , GxB_EQ_LE_FP64         ,
-
-//------------------------------------------------------------------------------
-// 40 purely Boolean semirings
-//------------------------------------------------------------------------------
-
-// purely boolean semirings (in the form GxB_(add monoid)_(multipy operator)_BOOL:
-GxB_LOR_FIRST_BOOL     , GxB_LAND_FIRST_BOOL    , GxB_LXOR_FIRST_BOOL    , GxB_EQ_FIRST_BOOL      ,
-GxB_LOR_SECOND_BOOL    , GxB_LAND_SECOND_BOOL   , GxB_LXOR_SECOND_BOOL   , GxB_EQ_SECOND_BOOL     ,
-GxB_LOR_LOR_BOOL       , GxB_LAND_LOR_BOOL      , GxB_LXOR_LOR_BOOL      , GxB_EQ_LOR_BOOL        ,
-GxB_LOR_LAND_BOOL      , GxB_LAND_LAND_BOOL     , GxB_LXOR_LAND_BOOL     , GxB_EQ_LAND_BOOL       ,
-GxB_LOR_LXOR_BOOL      , GxB_LAND_LXOR_BOOL     , GxB_LXOR_LXOR_BOOL     , GxB_EQ_LXOR_BOOL       ,
-GxB_LOR_EQ_BOOL        , GxB_LAND_EQ_BOOL       , GxB_LXOR_EQ_BOOL       , GxB_EQ_EQ_BOOL         ,
-GxB_LOR_GT_BOOL        , GxB_LAND_GT_BOOL       , GxB_LXOR_GT_BOOL       , GxB_EQ_GT_BOOL         ,
-GxB_LOR_LT_BOOL        , GxB_LAND_LT_BOOL       , GxB_LXOR_LT_BOOL       , GxB_EQ_LT_BOOL         ,
-GxB_LOR_GE_BOOL        , GxB_LAND_GE_BOOL       , GxB_LXOR_GE_BOOL       , GxB_EQ_GE_BOOL         ,
-GxB_LOR_LE_BOOL        , GxB_LAND_LE_BOOL       , GxB_LXOR_LE_BOOL       , GxB_EQ_LE_BOOL         ;
+// 1000 non-Boolean semirings where all types are the same, given by suffix _T
+//------------------------------------------------------------------------------
+
+    // semirings with multiply op: z = FIRST (x,y), all types x,y,z the same:
+    GxB_MIN_FIRST_INT8     , GxB_MAX_FIRST_INT8     , GxB_PLUS_FIRST_INT8    , GxB_TIMES_FIRST_INT8   , GxB_ANY_FIRST_INT8     , 
+    GxB_MIN_FIRST_UINT8    , GxB_MAX_FIRST_UINT8    , GxB_PLUS_FIRST_UINT8   , GxB_TIMES_FIRST_UINT8  , GxB_ANY_FIRST_UINT8    , 
+    GxB_MIN_FIRST_INT16    , GxB_MAX_FIRST_INT16    , GxB_PLUS_FIRST_INT16   , GxB_TIMES_FIRST_INT16  , GxB_ANY_FIRST_INT16    , 
+    GxB_MIN_FIRST_UINT16   , GxB_MAX_FIRST_UINT16   , GxB_PLUS_FIRST_UINT16  , GxB_TIMES_FIRST_UINT16 , GxB_ANY_FIRST_UINT16   , 
+    GxB_MIN_FIRST_INT32    , GxB_MAX_FIRST_INT32    , GxB_PLUS_FIRST_INT32   , GxB_TIMES_FIRST_INT32  , GxB_ANY_FIRST_INT32    , 
+    GxB_MIN_FIRST_UINT32   , GxB_MAX_FIRST_UINT32   , GxB_PLUS_FIRST_UINT32  , GxB_TIMES_FIRST_UINT32 , GxB_ANY_FIRST_UINT32   , 
+    GxB_MIN_FIRST_INT64    , GxB_MAX_FIRST_INT64    , GxB_PLUS_FIRST_INT64   , GxB_TIMES_FIRST_INT64  , GxB_ANY_FIRST_INT64    , 
+    GxB_MIN_FIRST_UINT64   , GxB_MAX_FIRST_UINT64   , GxB_PLUS_FIRST_UINT64  , GxB_TIMES_FIRST_UINT64 , GxB_ANY_FIRST_UINT64   , 
+    GxB_MIN_FIRST_FP32     , GxB_MAX_FIRST_FP32     , GxB_PLUS_FIRST_FP32    , GxB_TIMES_FIRST_FP32   , GxB_ANY_FIRST_FP32     , 
+    GxB_MIN_FIRST_FP64     , GxB_MAX_FIRST_FP64     , GxB_PLUS_FIRST_FP64    , GxB_TIMES_FIRST_FP64   , GxB_ANY_FIRST_FP64     , 
+
+    // semirings with multiply op: z = SECOND (x,y), all types x,y,z the same:
+    GxB_MIN_SECOND_INT8    , GxB_MAX_SECOND_INT8    , GxB_PLUS_SECOND_INT8   , GxB_TIMES_SECOND_INT8  , GxB_ANY_SECOND_INT8    , 
+    GxB_MIN_SECOND_UINT8   , GxB_MAX_SECOND_UINT8   , GxB_PLUS_SECOND_UINT8  , GxB_TIMES_SECOND_UINT8 , GxB_ANY_SECOND_UINT8   , 
+    GxB_MIN_SECOND_INT16   , GxB_MAX_SECOND_INT16   , GxB_PLUS_SECOND_INT16  , GxB_TIMES_SECOND_INT16 , GxB_ANY_SECOND_INT16   , 
+    GxB_MIN_SECOND_UINT16  , GxB_MAX_SECOND_UINT16  , GxB_PLUS_SECOND_UINT16 , GxB_TIMES_SECOND_UINT16, GxB_ANY_SECOND_UINT16  , 
+    GxB_MIN_SECOND_INT32   , GxB_MAX_SECOND_INT32   , GxB_PLUS_SECOND_INT32  , GxB_TIMES_SECOND_INT32 , GxB_ANY_SECOND_INT32   , 
+    GxB_MIN_SECOND_UINT32  , GxB_MAX_SECOND_UINT32  , GxB_PLUS_SECOND_UINT32 , GxB_TIMES_SECOND_UINT32, GxB_ANY_SECOND_UINT32  , 
+    GxB_MIN_SECOND_INT64   , GxB_MAX_SECOND_INT64   , GxB_PLUS_SECOND_INT64  , GxB_TIMES_SECOND_INT64 , GxB_ANY_SECOND_INT64   , 
+    GxB_MIN_SECOND_UINT64  , GxB_MAX_SECOND_UINT64  , GxB_PLUS_SECOND_UINT64 , GxB_TIMES_SECOND_UINT64, GxB_ANY_SECOND_UINT64  , 
+    GxB_MIN_SECOND_FP32    , GxB_MAX_SECOND_FP32    , GxB_PLUS_SECOND_FP32   , GxB_TIMES_SECOND_FP32  , GxB_ANY_SECOND_FP32    , 
+    GxB_MIN_SECOND_FP64    , GxB_MAX_SECOND_FP64    , GxB_PLUS_SECOND_FP64   , GxB_TIMES_SECOND_FP64  , GxB_ANY_SECOND_FP64    , 
+
+    // semirings with multiply op: z = PAIR (x,y), all types x,y,z the same:
+    GxB_MIN_PAIR_INT8      , GxB_MAX_PAIR_INT8      , GxB_PLUS_PAIR_INT8     , GxB_TIMES_PAIR_INT8    , GxB_ANY_PAIR_INT8      , 
+    GxB_MIN_PAIR_UINT8     , GxB_MAX_PAIR_UINT8     , GxB_PLUS_PAIR_UINT8    , GxB_TIMES_PAIR_UINT8   , GxB_ANY_PAIR_UINT8     , 
+    GxB_MIN_PAIR_INT16     , GxB_MAX_PAIR_INT16     , GxB_PLUS_PAIR_INT16    , GxB_TIMES_PAIR_INT16   , GxB_ANY_PAIR_INT16     , 
+    GxB_MIN_PAIR_UINT16    , GxB_MAX_PAIR_UINT16    , GxB_PLUS_PAIR_UINT16   , GxB_TIMES_PAIR_UINT16  , GxB_ANY_PAIR_UINT16    , 
+    GxB_MIN_PAIR_INT32     , GxB_MAX_PAIR_INT32     , GxB_PLUS_PAIR_INT32    , GxB_TIMES_PAIR_INT32   , GxB_ANY_PAIR_INT32     , 
+    GxB_MIN_PAIR_UINT32    , GxB_MAX_PAIR_UINT32    , GxB_PLUS_PAIR_UINT32   , GxB_TIMES_PAIR_UINT32  , GxB_ANY_PAIR_UINT32    , 
+    GxB_MIN_PAIR_INT64     , GxB_MAX_PAIR_INT64     , GxB_PLUS_PAIR_INT64    , GxB_TIMES_PAIR_INT64   , GxB_ANY_PAIR_INT64     , 
+    GxB_MIN_PAIR_UINT64    , GxB_MAX_PAIR_UINT64    , GxB_PLUS_PAIR_UINT64   , GxB_TIMES_PAIR_UINT64  , GxB_ANY_PAIR_UINT64    , 
+    GxB_MIN_PAIR_FP32      , GxB_MAX_PAIR_FP32      , GxB_PLUS_PAIR_FP32     , GxB_TIMES_PAIR_FP32    , GxB_ANY_PAIR_FP32      , 
+    GxB_MIN_PAIR_FP64      , GxB_MAX_PAIR_FP64      , GxB_PLUS_PAIR_FP64     , GxB_TIMES_PAIR_FP64    , GxB_ANY_PAIR_FP64      , 
+
+    // semirings with multiply op: z = MIN (x,y), all types x,y,z the same:
+    GxB_MIN_MIN_INT8       , GxB_MAX_MIN_INT8       , GxB_PLUS_MIN_INT8      , GxB_TIMES_MIN_INT8     , GxB_ANY_MIN_INT8       , 
+    GxB_MIN_MIN_UINT8      , GxB_MAX_MIN_UINT8      , GxB_PLUS_MIN_UINT8     , GxB_TIMES_MIN_UINT8    , GxB_ANY_MIN_UINT8      , 
+    GxB_MIN_MIN_INT16      , GxB_MAX_MIN_INT16      , GxB_PLUS_MIN_INT16     , GxB_TIMES_MIN_INT16    , GxB_ANY_MIN_INT16      , 
+    GxB_MIN_MIN_UINT16     , GxB_MAX_MIN_UINT16     , GxB_PLUS_MIN_UINT16    , GxB_TIMES_MIN_UINT16   , GxB_ANY_MIN_UINT16     , 
+    GxB_MIN_MIN_INT32      , GxB_MAX_MIN_INT32      , GxB_PLUS_MIN_INT32     , GxB_TIMES_MIN_INT32    , GxB_ANY_MIN_INT32      , 
+    GxB_MIN_MIN_UINT32     , GxB_MAX_MIN_UINT32     , GxB_PLUS_MIN_UINT32    , GxB_TIMES_MIN_UINT32   , GxB_ANY_MIN_UINT32     , 
+    GxB_MIN_MIN_INT64      , GxB_MAX_MIN_INT64      , GxB_PLUS_MIN_INT64     , GxB_TIMES_MIN_INT64    , GxB_ANY_MIN_INT64      , 
+    GxB_MIN_MIN_UINT64     , GxB_MAX_MIN_UINT64     , GxB_PLUS_MIN_UINT64    , GxB_TIMES_MIN_UINT64   , GxB_ANY_MIN_UINT64     , 
+    GxB_MIN_MIN_FP32       , GxB_MAX_MIN_FP32       , GxB_PLUS_MIN_FP32      , GxB_TIMES_MIN_FP32     , GxB_ANY_MIN_FP32       , 
+    GxB_MIN_MIN_FP64       , GxB_MAX_MIN_FP64       , GxB_PLUS_MIN_FP64      , GxB_TIMES_MIN_FP64     , GxB_ANY_MIN_FP64       , 
+
+    // semirings with multiply op: z = MAX (x,y), all types x,y,z the same:
+    GxB_MIN_MAX_INT8       , GxB_MAX_MAX_INT8       , GxB_PLUS_MAX_INT8      , GxB_TIMES_MAX_INT8     , GxB_ANY_MAX_INT8       , 
+    GxB_MIN_MAX_UINT8      , GxB_MAX_MAX_UINT8      , GxB_PLUS_MAX_UINT8     , GxB_TIMES_MAX_UINT8    , GxB_ANY_MAX_UINT8      , 
+    GxB_MIN_MAX_INT16      , GxB_MAX_MAX_INT16      , GxB_PLUS_MAX_INT16     , GxB_TIMES_MAX_INT16    , GxB_ANY_MAX_INT16      , 
+    GxB_MIN_MAX_UINT16     , GxB_MAX_MAX_UINT16     , GxB_PLUS_MAX_UINT16    , GxB_TIMES_MAX_UINT16   , GxB_ANY_MAX_UINT16     , 
+    GxB_MIN_MAX_INT32      , GxB_MAX_MAX_INT32      , GxB_PLUS_MAX_INT32     , GxB_TIMES_MAX_INT32    , GxB_ANY_MAX_INT32      , 
+    GxB_MIN_MAX_UINT32     , GxB_MAX_MAX_UINT32     , GxB_PLUS_MAX_UINT32    , GxB_TIMES_MAX_UINT32   , GxB_ANY_MAX_UINT32     , 
+    GxB_MIN_MAX_INT64      , GxB_MAX_MAX_INT64      , GxB_PLUS_MAX_INT64     , GxB_TIMES_MAX_INT64    , GxB_ANY_MAX_INT64      , 
+    GxB_MIN_MAX_UINT64     , GxB_MAX_MAX_UINT64     , GxB_PLUS_MAX_UINT64    , GxB_TIMES_MAX_UINT64   , GxB_ANY_MAX_UINT64     , 
+    GxB_MIN_MAX_FP32       , GxB_MAX_MAX_FP32       , GxB_PLUS_MAX_FP32      , GxB_TIMES_MAX_FP32     , GxB_ANY_MAX_FP32       , 
+    GxB_MIN_MAX_FP64       , GxB_MAX_MAX_FP64       , GxB_PLUS_MAX_FP64      , GxB_TIMES_MAX_FP64     , GxB_ANY_MAX_FP64       , 
+
+    // semirings with multiply op: z = PLUS (x,y), all types x,y,z the same:
+    GxB_MIN_PLUS_INT8      , GxB_MAX_PLUS_INT8      , GxB_PLUS_PLUS_INT8     , GxB_TIMES_PLUS_INT8    , GxB_ANY_PLUS_INT8      , 
+    GxB_MIN_PLUS_UINT8     , GxB_MAX_PLUS_UINT8     , GxB_PLUS_PLUS_UINT8    , GxB_TIMES_PLUS_UINT8   , GxB_ANY_PLUS_UINT8     , 
+    GxB_MIN_PLUS_INT16     , GxB_MAX_PLUS_INT16     , GxB_PLUS_PLUS_INT16    , GxB_TIMES_PLUS_INT16   , GxB_ANY_PLUS_INT16     , 
+    GxB_MIN_PLUS_UINT16    , GxB_MAX_PLUS_UINT16    , GxB_PLUS_PLUS_UINT16   , GxB_TIMES_PLUS_UINT16  , GxB_ANY_PLUS_UINT16    , 
+    GxB_MIN_PLUS_INT32     , GxB_MAX_PLUS_INT32     , GxB_PLUS_PLUS_INT32    , GxB_TIMES_PLUS_INT32   , GxB_ANY_PLUS_INT32     , 
+    GxB_MIN_PLUS_UINT32    , GxB_MAX_PLUS_UINT32    , GxB_PLUS_PLUS_UINT32   , GxB_TIMES_PLUS_UINT32  , GxB_ANY_PLUS_UINT32    , 
+    GxB_MIN_PLUS_INT64     , GxB_MAX_PLUS_INT64     , GxB_PLUS_PLUS_INT64    , GxB_TIMES_PLUS_INT64   , GxB_ANY_PLUS_INT64     , 
+    GxB_MIN_PLUS_UINT64    , GxB_MAX_PLUS_UINT64    , GxB_PLUS_PLUS_UINT64   , GxB_TIMES_PLUS_UINT64  , GxB_ANY_PLUS_UINT64    , 
+    GxB_MIN_PLUS_FP32      , GxB_MAX_PLUS_FP32      , GxB_PLUS_PLUS_FP32     , GxB_TIMES_PLUS_FP32    , GxB_ANY_PLUS_FP32      , 
+    GxB_MIN_PLUS_FP64      , GxB_MAX_PLUS_FP64      , GxB_PLUS_PLUS_FP64     , GxB_TIMES_PLUS_FP64    , GxB_ANY_PLUS_FP64      , 
+
+    // semirings with multiply op: z = MINUS (x,y), all types x,y,z the same:
+    GxB_MIN_MINUS_INT8     , GxB_MAX_MINUS_INT8     , GxB_PLUS_MINUS_INT8    , GxB_TIMES_MINUS_INT8   , GxB_ANY_MINUS_INT8     , 
+    GxB_MIN_MINUS_UINT8    , GxB_MAX_MINUS_UINT8    , GxB_PLUS_MINUS_UINT8   , GxB_TIMES_MINUS_UINT8  , GxB_ANY_MINUS_UINT8    , 
+    GxB_MIN_MINUS_INT16    , GxB_MAX_MINUS_INT16    , GxB_PLUS_MINUS_INT16   , GxB_TIMES_MINUS_INT16  , GxB_ANY_MINUS_INT16    , 
+    GxB_MIN_MINUS_UINT16   , GxB_MAX_MINUS_UINT16   , GxB_PLUS_MINUS_UINT16  , GxB_TIMES_MINUS_UINT16 , GxB_ANY_MINUS_UINT16   , 
+    GxB_MIN_MINUS_INT32    , GxB_MAX_MINUS_INT32    , GxB_PLUS_MINUS_INT32   , GxB_TIMES_MINUS_INT32  , GxB_ANY_MINUS_INT32    , 
+    GxB_MIN_MINUS_UINT32   , GxB_MAX_MINUS_UINT32   , GxB_PLUS_MINUS_UINT32  , GxB_TIMES_MINUS_UINT32 , GxB_ANY_MINUS_UINT32   , 
+    GxB_MIN_MINUS_INT64    , GxB_MAX_MINUS_INT64    , GxB_PLUS_MINUS_INT64   , GxB_TIMES_MINUS_INT64  , GxB_ANY_MINUS_INT64    , 
+    GxB_MIN_MINUS_UINT64   , GxB_MAX_MINUS_UINT64   , GxB_PLUS_MINUS_UINT64  , GxB_TIMES_MINUS_UINT64 , GxB_ANY_MINUS_UINT64   , 
+    GxB_MIN_MINUS_FP32     , GxB_MAX_MINUS_FP32     , GxB_PLUS_MINUS_FP32    , GxB_TIMES_MINUS_FP32   , GxB_ANY_MINUS_FP32     , 
+    GxB_MIN_MINUS_FP64     , GxB_MAX_MINUS_FP64     , GxB_PLUS_MINUS_FP64    , GxB_TIMES_MINUS_FP64   , GxB_ANY_MINUS_FP64     , 
+
+    // semirings with multiply op: z = TIMES (x,y), all types x,y,z the same:
+    GxB_MIN_TIMES_INT8     , GxB_MAX_TIMES_INT8     , GxB_PLUS_TIMES_INT8    , GxB_TIMES_TIMES_INT8   , GxB_ANY_TIMES_INT8     , 
+    GxB_MIN_TIMES_UINT8    , GxB_MAX_TIMES_UINT8    , GxB_PLUS_TIMES_UINT8   , GxB_TIMES_TIMES_UINT8  , GxB_ANY_TIMES_UINT8    , 
+    GxB_MIN_TIMES_INT16    , GxB_MAX_TIMES_INT16    , GxB_PLUS_TIMES_INT16   , GxB_TIMES_TIMES_INT16  , GxB_ANY_TIMES_INT16    , 
+    GxB_MIN_TIMES_UINT16   , GxB_MAX_TIMES_UINT16   , GxB_PLUS_TIMES_UINT16  , GxB_TIMES_TIMES_UINT16 , GxB_ANY_TIMES_UINT16   , 
+    GxB_MIN_TIMES_INT32    , GxB_MAX_TIMES_INT32    , GxB_PLUS_TIMES_INT32   , GxB_TIMES_TIMES_INT32  , GxB_ANY_TIMES_INT32    , 
+    GxB_MIN_TIMES_UINT32   , GxB_MAX_TIMES_UINT32   , GxB_PLUS_TIMES_UINT32  , GxB_TIMES_TIMES_UINT32 , GxB_ANY_TIMES_UINT32   , 
+    GxB_MIN_TIMES_INT64    , GxB_MAX_TIMES_INT64    , GxB_PLUS_TIMES_INT64   , GxB_TIMES_TIMES_INT64  , GxB_ANY_TIMES_INT64    , 
+    GxB_MIN_TIMES_UINT64   , GxB_MAX_TIMES_UINT64   , GxB_PLUS_TIMES_UINT64  , GxB_TIMES_TIMES_UINT64 , GxB_ANY_TIMES_UINT64   , 
+    GxB_MIN_TIMES_FP32     , GxB_MAX_TIMES_FP32     , GxB_PLUS_TIMES_FP32    , GxB_TIMES_TIMES_FP32   , GxB_ANY_TIMES_FP32     , 
+    GxB_MIN_TIMES_FP64     , GxB_MAX_TIMES_FP64     , GxB_PLUS_TIMES_FP64    , GxB_TIMES_TIMES_FP64   , GxB_ANY_TIMES_FP64     , 
+
+    // semirings with multiply op: z = DIV (x,y), all types x,y,z the same:
+    GxB_MIN_DIV_INT8       , GxB_MAX_DIV_INT8       , GxB_PLUS_DIV_INT8      , GxB_TIMES_DIV_INT8     , GxB_ANY_DIV_INT8       , 
+    GxB_MIN_DIV_UINT8      , GxB_MAX_DIV_UINT8      , GxB_PLUS_DIV_UINT8     , GxB_TIMES_DIV_UINT8    , GxB_ANY_DIV_UINT8      , 
+    GxB_MIN_DIV_INT16      , GxB_MAX_DIV_INT16      , GxB_PLUS_DIV_INT16     , GxB_TIMES_DIV_INT16    , GxB_ANY_DIV_INT16      , 
+    GxB_MIN_DIV_UINT16     , GxB_MAX_DIV_UINT16     , GxB_PLUS_DIV_UINT16    , GxB_TIMES_DIV_UINT16   , GxB_ANY_DIV_UINT16     , 
+    GxB_MIN_DIV_INT32      , GxB_MAX_DIV_INT32      , GxB_PLUS_DIV_INT32     , GxB_TIMES_DIV_INT32    , GxB_ANY_DIV_INT32      , 
+    GxB_MIN_DIV_UINT32     , GxB_MAX_DIV_UINT32     , GxB_PLUS_DIV_UINT32    , GxB_TIMES_DIV_UINT32   , GxB_ANY_DIV_UINT32     , 
+    GxB_MIN_DIV_INT64      , GxB_MAX_DIV_INT64      , GxB_PLUS_DIV_INT64     , GxB_TIMES_DIV_INT64    , GxB_ANY_DIV_INT64      , 
+    GxB_MIN_DIV_UINT64     , GxB_MAX_DIV_UINT64     , GxB_PLUS_DIV_UINT64    , GxB_TIMES_DIV_UINT64   , GxB_ANY_DIV_UINT64     , 
+    GxB_MIN_DIV_FP32       , GxB_MAX_DIV_FP32       , GxB_PLUS_DIV_FP32      , GxB_TIMES_DIV_FP32     , GxB_ANY_DIV_FP32       , 
+    GxB_MIN_DIV_FP64       , GxB_MAX_DIV_FP64       , GxB_PLUS_DIV_FP64      , GxB_TIMES_DIV_FP64     , GxB_ANY_DIV_FP64       , 
+
+    // semirings with multiply op: z = RDIV (x,y), all types x,y,z the same:
+    GxB_MIN_RDIV_INT8      , GxB_MAX_RDIV_INT8      , GxB_PLUS_RDIV_INT8     , GxB_TIMES_RDIV_INT8    , GxB_ANY_RDIV_INT8      , 
+    GxB_MIN_RDIV_UINT8     , GxB_MAX_RDIV_UINT8     , GxB_PLUS_RDIV_UINT8    , GxB_TIMES_RDIV_UINT8   , GxB_ANY_RDIV_UINT8     , 
+    GxB_MIN_RDIV_INT16     , GxB_MAX_RDIV_INT16     , GxB_PLUS_RDIV_INT16    , GxB_TIMES_RDIV_INT16   , GxB_ANY_RDIV_INT16     , 
+    GxB_MIN_RDIV_UINT16    , GxB_MAX_RDIV_UINT16    , GxB_PLUS_RDIV_UINT16   , GxB_TIMES_RDIV_UINT16  , GxB_ANY_RDIV_UINT16    , 
+    GxB_MIN_RDIV_INT32     , GxB_MAX_RDIV_INT32     , GxB_PLUS_RDIV_INT32    , GxB_TIMES_RDIV_INT32   , GxB_ANY_RDIV_INT32     , 
+    GxB_MIN_RDIV_UINT32    , GxB_MAX_RDIV_UINT32    , GxB_PLUS_RDIV_UINT32   , GxB_TIMES_RDIV_UINT32  , GxB_ANY_RDIV_UINT32    , 
+    GxB_MIN_RDIV_INT64     , GxB_MAX_RDIV_INT64     , GxB_PLUS_RDIV_INT64    , GxB_TIMES_RDIV_INT64   , GxB_ANY_RDIV_INT64     , 
+    GxB_MIN_RDIV_UINT64    , GxB_MAX_RDIV_UINT64    , GxB_PLUS_RDIV_UINT64   , GxB_TIMES_RDIV_UINT64  , GxB_ANY_RDIV_UINT64    , 
+    GxB_MIN_RDIV_FP32      , GxB_MAX_RDIV_FP32      , GxB_PLUS_RDIV_FP32     , GxB_TIMES_RDIV_FP32    , GxB_ANY_RDIV_FP32      , 
+    GxB_MIN_RDIV_FP64      , GxB_MAX_RDIV_FP64      , GxB_PLUS_RDIV_FP64     , GxB_TIMES_RDIV_FP64    , GxB_ANY_RDIV_FP64      , 
+
+    // semirings with multiply op: z = RMINUS (x,y), all types x,y,z the same:
+    GxB_MIN_RMINUS_INT8    , GxB_MAX_RMINUS_INT8    , GxB_PLUS_RMINUS_INT8   , GxB_TIMES_RMINUS_INT8  , GxB_ANY_RMINUS_INT8    , 
+    GxB_MIN_RMINUS_UINT8   , GxB_MAX_RMINUS_UINT8   , GxB_PLUS_RMINUS_UINT8  , GxB_TIMES_RMINUS_UINT8 , GxB_ANY_RMINUS_UINT8   , 
+    GxB_MIN_RMINUS_INT16   , GxB_MAX_RMINUS_INT16   , GxB_PLUS_RMINUS_INT16  , GxB_TIMES_RMINUS_INT16 , GxB_ANY_RMINUS_INT16   , 
+    GxB_MIN_RMINUS_UINT16  , GxB_MAX_RMINUS_UINT16  , GxB_PLUS_RMINUS_UINT16 , GxB_TIMES_RMINUS_UINT16, GxB_ANY_RMINUS_UINT16  , 
+    GxB_MIN_RMINUS_INT32   , GxB_MAX_RMINUS_INT32   , GxB_PLUS_RMINUS_INT32  , GxB_TIMES_RMINUS_INT32 , GxB_ANY_RMINUS_INT32   , 
+    GxB_MIN_RMINUS_UINT32  , GxB_MAX_RMINUS_UINT32  , GxB_PLUS_RMINUS_UINT32 , GxB_TIMES_RMINUS_UINT32, GxB_ANY_RMINUS_UINT32  , 
+    GxB_MIN_RMINUS_INT64   , GxB_MAX_RMINUS_INT64   , GxB_PLUS_RMINUS_INT64  , GxB_TIMES_RMINUS_INT64 , GxB_ANY_RMINUS_INT64   , 
+    GxB_MIN_RMINUS_UINT64  , GxB_MAX_RMINUS_UINT64  , GxB_PLUS_RMINUS_UINT64 , GxB_TIMES_RMINUS_UINT64, GxB_ANY_RMINUS_UINT64  , 
+    GxB_MIN_RMINUS_FP32    , GxB_MAX_RMINUS_FP32    , GxB_PLUS_RMINUS_FP32   , GxB_TIMES_RMINUS_FP32  , GxB_ANY_RMINUS_FP32    , 
+    GxB_MIN_RMINUS_FP64    , GxB_MAX_RMINUS_FP64    , GxB_PLUS_RMINUS_FP64   , GxB_TIMES_RMINUS_FP64  , GxB_ANY_RMINUS_FP64    , 
+
+    // semirings with multiply op: z = ISEQ (x,y), all types x,y,z the same:
+    GxB_MIN_ISEQ_INT8      , GxB_MAX_ISEQ_INT8      , GxB_PLUS_ISEQ_INT8     , GxB_TIMES_ISEQ_INT8    , GxB_ANY_ISEQ_INT8      , 
+    GxB_MIN_ISEQ_UINT8     , GxB_MAX_ISEQ_UINT8     , GxB_PLUS_ISEQ_UINT8    , GxB_TIMES_ISEQ_UINT8   , GxB_ANY_ISEQ_UINT8     , 
+    GxB_MIN_ISEQ_INT16     , GxB_MAX_ISEQ_INT16     , GxB_PLUS_ISEQ_INT16    , GxB_TIMES_ISEQ_INT16   , GxB_ANY_ISEQ_INT16     , 
+    GxB_MIN_ISEQ_UINT16    , GxB_MAX_ISEQ_UINT16    , GxB_PLUS_ISEQ_UINT16   , GxB_TIMES_ISEQ_UINT16  , GxB_ANY_ISEQ_UINT16    , 
+    GxB_MIN_ISEQ_INT32     , GxB_MAX_ISEQ_INT32     , GxB_PLUS_ISEQ_INT32    , GxB_TIMES_ISEQ_INT32   , GxB_ANY_ISEQ_INT32     , 
+    GxB_MIN_ISEQ_UINT32    , GxB_MAX_ISEQ_UINT32    , GxB_PLUS_ISEQ_UINT32   , GxB_TIMES_ISEQ_UINT32  , GxB_ANY_ISEQ_UINT32    , 
+    GxB_MIN_ISEQ_INT64     , GxB_MAX_ISEQ_INT64     , GxB_PLUS_ISEQ_INT64    , GxB_TIMES_ISEQ_INT64   , GxB_ANY_ISEQ_INT64     , 
+    GxB_MIN_ISEQ_UINT64    , GxB_MAX_ISEQ_UINT64    , GxB_PLUS_ISEQ_UINT64   , GxB_TIMES_ISEQ_UINT64  , GxB_ANY_ISEQ_UINT64    , 
+    GxB_MIN_ISEQ_FP32      , GxB_MAX_ISEQ_FP32      , GxB_PLUS_ISEQ_FP32     , GxB_TIMES_ISEQ_FP32    , GxB_ANY_ISEQ_FP32      , 
+    GxB_MIN_ISEQ_FP64      , GxB_MAX_ISEQ_FP64      , GxB_PLUS_ISEQ_FP64     , GxB_TIMES_ISEQ_FP64    , GxB_ANY_ISEQ_FP64      , 
+
+    // semirings with multiply op: z = ISNE (x,y), all types x,y,z the same:
+    GxB_MIN_ISNE_INT8      , GxB_MAX_ISNE_INT8      , GxB_PLUS_ISNE_INT8     , GxB_TIMES_ISNE_INT8    , GxB_ANY_ISNE_INT8      , 
+    GxB_MIN_ISNE_UINT8     , GxB_MAX_ISNE_UINT8     , GxB_PLUS_ISNE_UINT8    , GxB_TIMES_ISNE_UINT8   , GxB_ANY_ISNE_UINT8     , 
+    GxB_MIN_ISNE_INT16     , GxB_MAX_ISNE_INT16     , GxB_PLUS_ISNE_INT16    , GxB_TIMES_ISNE_INT16   , GxB_ANY_ISNE_INT16     , 
+    GxB_MIN_ISNE_UINT16    , GxB_MAX_ISNE_UINT16    , GxB_PLUS_ISNE_UINT16   , GxB_TIMES_ISNE_UINT16  , GxB_ANY_ISNE_UINT16    , 
+    GxB_MIN_ISNE_INT32     , GxB_MAX_ISNE_INT32     , GxB_PLUS_ISNE_INT32    , GxB_TIMES_ISNE_INT32   , GxB_ANY_ISNE_INT32     , 
+    GxB_MIN_ISNE_UINT32    , GxB_MAX_ISNE_UINT32    , GxB_PLUS_ISNE_UINT32   , GxB_TIMES_ISNE_UINT32  , GxB_ANY_ISNE_UINT32    , 
+    GxB_MIN_ISNE_INT64     , GxB_MAX_ISNE_INT64     , GxB_PLUS_ISNE_INT64    , GxB_TIMES_ISNE_INT64   , GxB_ANY_ISNE_INT64     , 
+    GxB_MIN_ISNE_UINT64    , GxB_MAX_ISNE_UINT64    , GxB_PLUS_ISNE_UINT64   , GxB_TIMES_ISNE_UINT64  , GxB_ANY_ISNE_UINT64    , 
+    GxB_MIN_ISNE_FP32      , GxB_MAX_ISNE_FP32      , GxB_PLUS_ISNE_FP32     , GxB_TIMES_ISNE_FP32    , GxB_ANY_ISNE_FP32      , 
+    GxB_MIN_ISNE_FP64      , GxB_MAX_ISNE_FP64      , GxB_PLUS_ISNE_FP64     , GxB_TIMES_ISNE_FP64    , GxB_ANY_ISNE_FP64      , 
+
+    // semirings with multiply op: z = ISGT (x,y), all types x,y,z the same:
+    GxB_MIN_ISGT_INT8      , GxB_MAX_ISGT_INT8      , GxB_PLUS_ISGT_INT8     , GxB_TIMES_ISGT_INT8    , GxB_ANY_ISGT_INT8      , 
+    GxB_MIN_ISGT_UINT8     , GxB_MAX_ISGT_UINT8     , GxB_PLUS_ISGT_UINT8    , GxB_TIMES_ISGT_UINT8   , GxB_ANY_ISGT_UINT8     , 
+    GxB_MIN_ISGT_INT16     , GxB_MAX_ISGT_INT16     , GxB_PLUS_ISGT_INT16    , GxB_TIMES_ISGT_INT16   , GxB_ANY_ISGT_INT16     , 
+    GxB_MIN_ISGT_UINT16    , GxB_MAX_ISGT_UINT16    , GxB_PLUS_ISGT_UINT16   , GxB_TIMES_ISGT_UINT16  , GxB_ANY_ISGT_UINT16    , 
+    GxB_MIN_ISGT_INT32     , GxB_MAX_ISGT_INT32     , GxB_PLUS_ISGT_INT32    , GxB_TIMES_ISGT_INT32   , GxB_ANY_ISGT_INT32     , 
+    GxB_MIN_ISGT_UINT32    , GxB_MAX_ISGT_UINT32    , GxB_PLUS_ISGT_UINT32   , GxB_TIMES_ISGT_UINT32  , GxB_ANY_ISGT_UINT32    , 
+    GxB_MIN_ISGT_INT64     , GxB_MAX_ISGT_INT64     , GxB_PLUS_ISGT_INT64    , GxB_TIMES_ISGT_INT64   , GxB_ANY_ISGT_INT64     , 
+    GxB_MIN_ISGT_UINT64    , GxB_MAX_ISGT_UINT64    , GxB_PLUS_ISGT_UINT64   , GxB_TIMES_ISGT_UINT64  , GxB_ANY_ISGT_UINT64    , 
+    GxB_MIN_ISGT_FP32      , GxB_MAX_ISGT_FP32      , GxB_PLUS_ISGT_FP32     , GxB_TIMES_ISGT_FP32    , GxB_ANY_ISGT_FP32      , 
+    GxB_MIN_ISGT_FP64      , GxB_MAX_ISGT_FP64      , GxB_PLUS_ISGT_FP64     , GxB_TIMES_ISGT_FP64    , GxB_ANY_ISGT_FP64      , 
+
+    // semirings with multiply op: z = ISLT (x,y), all types x,y,z the same:
+    GxB_MIN_ISLT_INT8      , GxB_MAX_ISLT_INT8      , GxB_PLUS_ISLT_INT8     , GxB_TIMES_ISLT_INT8    , GxB_ANY_ISLT_INT8      , 
+    GxB_MIN_ISLT_UINT8     , GxB_MAX_ISLT_UINT8     , GxB_PLUS_ISLT_UINT8    , GxB_TIMES_ISLT_UINT8   , GxB_ANY_ISLT_UINT8     , 
+    GxB_MIN_ISLT_INT16     , GxB_MAX_ISLT_INT16     , GxB_PLUS_ISLT_INT16    , GxB_TIMES_ISLT_INT16   , GxB_ANY_ISLT_INT16     , 
+    GxB_MIN_ISLT_UINT16    , GxB_MAX_ISLT_UINT16    , GxB_PLUS_ISLT_UINT16   , GxB_TIMES_ISLT_UINT16  , GxB_ANY_ISLT_UINT16    , 
+    GxB_MIN_ISLT_INT32     , GxB_MAX_ISLT_INT32     , GxB_PLUS_ISLT_INT32    , GxB_TIMES_ISLT_INT32   , GxB_ANY_ISLT_INT32     , 
+    GxB_MIN_ISLT_UINT32    , GxB_MAX_ISLT_UINT32    , GxB_PLUS_ISLT_UINT32   , GxB_TIMES_ISLT_UINT32  , GxB_ANY_ISLT_UINT32    , 
+    GxB_MIN_ISLT_INT64     , GxB_MAX_ISLT_INT64     , GxB_PLUS_ISLT_INT64    , GxB_TIMES_ISLT_INT64   , GxB_ANY_ISLT_INT64     , 
+    GxB_MIN_ISLT_UINT64    , GxB_MAX_ISLT_UINT64    , GxB_PLUS_ISLT_UINT64   , GxB_TIMES_ISLT_UINT64  , GxB_ANY_ISLT_UINT64    , 
+    GxB_MIN_ISLT_FP32      , GxB_MAX_ISLT_FP32      , GxB_PLUS_ISLT_FP32     , GxB_TIMES_ISLT_FP32    , GxB_ANY_ISLT_FP32      , 
+    GxB_MIN_ISLT_FP64      , GxB_MAX_ISLT_FP64      , GxB_PLUS_ISLT_FP64     , GxB_TIMES_ISLT_FP64    , GxB_ANY_ISLT_FP64      , 
+
+    // semirings with multiply op: z = ISGE (x,y), all types x,y,z the same:
+    GxB_MIN_ISGE_INT8      , GxB_MAX_ISGE_INT8      , GxB_PLUS_ISGE_INT8     , GxB_TIMES_ISGE_INT8    , GxB_ANY_ISGE_INT8      , 
+    GxB_MIN_ISGE_UINT8     , GxB_MAX_ISGE_UINT8     , GxB_PLUS_ISGE_UINT8    , GxB_TIMES_ISGE_UINT8   , GxB_ANY_ISGE_UINT8     , 
+    GxB_MIN_ISGE_INT16     , GxB_MAX_ISGE_INT16     , GxB_PLUS_ISGE_INT16    , GxB_TIMES_ISGE_INT16   , GxB_ANY_ISGE_INT16     , 
+    GxB_MIN_ISGE_UINT16    , GxB_MAX_ISGE_UINT16    , GxB_PLUS_ISGE_UINT16   , GxB_TIMES_ISGE_UINT16  , GxB_ANY_ISGE_UINT16    , 
+    GxB_MIN_ISGE_INT32     , GxB_MAX_ISGE_INT32     , GxB_PLUS_ISGE_INT32    , GxB_TIMES_ISGE_INT32   , GxB_ANY_ISGE_INT32     , 
+    GxB_MIN_ISGE_UINT32    , GxB_MAX_ISGE_UINT32    , GxB_PLUS_ISGE_UINT32   , GxB_TIMES_ISGE_UINT32  , GxB_ANY_ISGE_UINT32    , 
+    GxB_MIN_ISGE_INT64     , GxB_MAX_ISGE_INT64     , GxB_PLUS_ISGE_INT64    , GxB_TIMES_ISGE_INT64   , GxB_ANY_ISGE_INT64     , 
+    GxB_MIN_ISGE_UINT64    , GxB_MAX_ISGE_UINT64    , GxB_PLUS_ISGE_UINT64   , GxB_TIMES_ISGE_UINT64  , GxB_ANY_ISGE_UINT64    , 
+    GxB_MIN_ISGE_FP32      , GxB_MAX_ISGE_FP32      , GxB_PLUS_ISGE_FP32     , GxB_TIMES_ISGE_FP32    , GxB_ANY_ISGE_FP32      , 
+    GxB_MIN_ISGE_FP64      , GxB_MAX_ISGE_FP64      , GxB_PLUS_ISGE_FP64     , GxB_TIMES_ISGE_FP64    , GxB_ANY_ISGE_FP64      , 
+
+    // semirings with multiply op: z = ISLE (x,y), all types x,y,z the same:
+    GxB_MIN_ISLE_INT8      , GxB_MAX_ISLE_INT8      , GxB_PLUS_ISLE_INT8     , GxB_TIMES_ISLE_INT8    , GxB_ANY_ISLE_INT8      , 
+    GxB_MIN_ISLE_UINT8     , GxB_MAX_ISLE_UINT8     , GxB_PLUS_ISLE_UINT8    , GxB_TIMES_ISLE_UINT8   , GxB_ANY_ISLE_UINT8     , 
+    GxB_MIN_ISLE_INT16     , GxB_MAX_ISLE_INT16     , GxB_PLUS_ISLE_INT16    , GxB_TIMES_ISLE_INT16   , GxB_ANY_ISLE_INT16     , 
+    GxB_MIN_ISLE_UINT16    , GxB_MAX_ISLE_UINT16    , GxB_PLUS_ISLE_UINT16   , GxB_TIMES_ISLE_UINT16  , GxB_ANY_ISLE_UINT16    , 
+    GxB_MIN_ISLE_INT32     , GxB_MAX_ISLE_INT32     , GxB_PLUS_ISLE_INT32    , GxB_TIMES_ISLE_INT32   , GxB_ANY_ISLE_INT32     , 
+    GxB_MIN_ISLE_UINT32    , GxB_MAX_ISLE_UINT32    , GxB_PLUS_ISLE_UINT32   , GxB_TIMES_ISLE_UINT32  , GxB_ANY_ISLE_UINT32    , 
+    GxB_MIN_ISLE_INT64     , GxB_MAX_ISLE_INT64     , GxB_PLUS_ISLE_INT64    , GxB_TIMES_ISLE_INT64   , GxB_ANY_ISLE_INT64     , 
+    GxB_MIN_ISLE_UINT64    , GxB_MAX_ISLE_UINT64    , GxB_PLUS_ISLE_UINT64   , GxB_TIMES_ISLE_UINT64  , GxB_ANY_ISLE_UINT64    , 
+    GxB_MIN_ISLE_FP32      , GxB_MAX_ISLE_FP32      , GxB_PLUS_ISLE_FP32     , GxB_TIMES_ISLE_FP32    , GxB_ANY_ISLE_FP32      , 
+    GxB_MIN_ISLE_FP64      , GxB_MAX_ISLE_FP64      , GxB_PLUS_ISLE_FP64     , GxB_TIMES_ISLE_FP64    , GxB_ANY_ISLE_FP64      , 
+
+    // semirings with multiply op: z = LOR (x,y), all types x,y,z the same:
+    GxB_MIN_LOR_INT8       , GxB_MAX_LOR_INT8       , GxB_PLUS_LOR_INT8      , GxB_TIMES_LOR_INT8     , GxB_ANY_LOR_INT8       , 
+    GxB_MIN_LOR_UINT8      , GxB_MAX_LOR_UINT8      , GxB_PLUS_LOR_UINT8     , GxB_TIMES_LOR_UINT8    , GxB_ANY_LOR_UINT8      , 
+    GxB_MIN_LOR_INT16      , GxB_MAX_LOR_INT16      , GxB_PLUS_LOR_INT16     , GxB_TIMES_LOR_INT16    , GxB_ANY_LOR_INT16      , 
+    GxB_MIN_LOR_UINT16     , GxB_MAX_LOR_UINT16     , GxB_PLUS_LOR_UINT16    , GxB_TIMES_LOR_UINT16   , GxB_ANY_LOR_UINT16     , 
+    GxB_MIN_LOR_INT32      , GxB_MAX_LOR_INT32      , GxB_PLUS_LOR_INT32     , GxB_TIMES_LOR_INT32    , GxB_ANY_LOR_INT32      , 
+    GxB_MIN_LOR_UINT32     , GxB_MAX_LOR_UINT32     , GxB_PLUS_LOR_UINT32    , GxB_TIMES_LOR_UINT32   , GxB_ANY_LOR_UINT32     , 
+    GxB_MIN_LOR_INT64      , GxB_MAX_LOR_INT64      , GxB_PLUS_LOR_INT64     , GxB_TIMES_LOR_INT64    , GxB_ANY_LOR_INT64      , 
+    GxB_MIN_LOR_UINT64     , GxB_MAX_LOR_UINT64     , GxB_PLUS_LOR_UINT64    , GxB_TIMES_LOR_UINT64   , GxB_ANY_LOR_UINT64     , 
+    GxB_MIN_LOR_FP32       , GxB_MAX_LOR_FP32       , GxB_PLUS_LOR_FP32      , GxB_TIMES_LOR_FP32     , GxB_ANY_LOR_FP32       , 
+    GxB_MIN_LOR_FP64       , GxB_MAX_LOR_FP64       , GxB_PLUS_LOR_FP64      , GxB_TIMES_LOR_FP64     , GxB_ANY_LOR_FP64       , 
+
+    // semirings with multiply op: z = LAND (x,y), all types x,y,z the same:
+    GxB_MIN_LAND_INT8      , GxB_MAX_LAND_INT8      , GxB_PLUS_LAND_INT8     , GxB_TIMES_LAND_INT8    , GxB_ANY_LAND_INT8      , 
+    GxB_MIN_LAND_UINT8     , GxB_MAX_LAND_UINT8     , GxB_PLUS_LAND_UINT8    , GxB_TIMES_LAND_UINT8   , GxB_ANY_LAND_UINT8     , 
+    GxB_MIN_LAND_INT16     , GxB_MAX_LAND_INT16     , GxB_PLUS_LAND_INT16    , GxB_TIMES_LAND_INT16   , GxB_ANY_LAND_INT16     , 
+    GxB_MIN_LAND_UINT16    , GxB_MAX_LAND_UINT16    , GxB_PLUS_LAND_UINT16   , GxB_TIMES_LAND_UINT16  , GxB_ANY_LAND_UINT16    , 
+    GxB_MIN_LAND_INT32     , GxB_MAX_LAND_INT32     , GxB_PLUS_LAND_INT32    , GxB_TIMES_LAND_INT32   , GxB_ANY_LAND_INT32     , 
+    GxB_MIN_LAND_UINT32    , GxB_MAX_LAND_UINT32    , GxB_PLUS_LAND_UINT32   , GxB_TIMES_LAND_UINT32  , GxB_ANY_LAND_UINT32    , 
+    GxB_MIN_LAND_INT64     , GxB_MAX_LAND_INT64     , GxB_PLUS_LAND_INT64    , GxB_TIMES_LAND_INT64   , GxB_ANY_LAND_INT64     , 
+    GxB_MIN_LAND_UINT64    , GxB_MAX_LAND_UINT64    , GxB_PLUS_LAND_UINT64   , GxB_TIMES_LAND_UINT64  , GxB_ANY_LAND_UINT64    , 
+    GxB_MIN_LAND_FP32      , GxB_MAX_LAND_FP32      , GxB_PLUS_LAND_FP32     , GxB_TIMES_LAND_FP32    , GxB_ANY_LAND_FP32      , 
+    GxB_MIN_LAND_FP64      , GxB_MAX_LAND_FP64      , GxB_PLUS_LAND_FP64     , GxB_TIMES_LAND_FP64    , GxB_ANY_LAND_FP64      , 
+
+    // semirings with multiply op: z = LXOR (x,y), all types x,y,z the same:
+    GxB_MIN_LXOR_INT8      , GxB_MAX_LXOR_INT8      , GxB_PLUS_LXOR_INT8     , GxB_TIMES_LXOR_INT8    , GxB_ANY_LXOR_INT8      , 
+    GxB_MIN_LXOR_UINT8     , GxB_MAX_LXOR_UINT8     , GxB_PLUS_LXOR_UINT8    , GxB_TIMES_LXOR_UINT8   , GxB_ANY_LXOR_UINT8     , 
+    GxB_MIN_LXOR_INT16     , GxB_MAX_LXOR_INT16     , GxB_PLUS_LXOR_INT16    , GxB_TIMES_LXOR_INT16   , GxB_ANY_LXOR_INT16     , 
+    GxB_MIN_LXOR_UINT16    , GxB_MAX_LXOR_UINT16    , GxB_PLUS_LXOR_UINT16   , GxB_TIMES_LXOR_UINT16  , GxB_ANY_LXOR_UINT16    , 
+    GxB_MIN_LXOR_INT32     , GxB_MAX_LXOR_INT32     , GxB_PLUS_LXOR_INT32    , GxB_TIMES_LXOR_INT32   , GxB_ANY_LXOR_INT32     , 
+    GxB_MIN_LXOR_UINT32    , GxB_MAX_LXOR_UINT32    , GxB_PLUS_LXOR_UINT32   , GxB_TIMES_LXOR_UINT32  , GxB_ANY_LXOR_UINT32    , 
+    GxB_MIN_LXOR_INT64     , GxB_MAX_LXOR_INT64     , GxB_PLUS_LXOR_INT64    , GxB_TIMES_LXOR_INT64   , GxB_ANY_LXOR_INT64     , 
+    GxB_MIN_LXOR_UINT64    , GxB_MAX_LXOR_UINT64    , GxB_PLUS_LXOR_UINT64   , GxB_TIMES_LXOR_UINT64  , GxB_ANY_LXOR_UINT64    , 
+    GxB_MIN_LXOR_FP32      , GxB_MAX_LXOR_FP32      , GxB_PLUS_LXOR_FP32     , GxB_TIMES_LXOR_FP32    , GxB_ANY_LXOR_FP32      , 
+    GxB_MIN_LXOR_FP64      , GxB_MAX_LXOR_FP64      , GxB_PLUS_LXOR_FP64     , GxB_TIMES_LXOR_FP64    , GxB_ANY_LXOR_FP64      , 
+
+//------------------------------------------------------------------------------
+// 300 semirings with a comparison operator TxT -> bool, where T is non-Boolean
+//------------------------------------------------------------------------------
+
+    // semirings with multiply op: z = EQ (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_EQ_INT8        , GxB_LAND_EQ_INT8       , GxB_LXOR_EQ_INT8       , GxB_EQ_EQ_INT8         , GxB_ANY_EQ_INT8        , 
+    GxB_LOR_EQ_UINT8       , GxB_LAND_EQ_UINT8      , GxB_LXOR_EQ_UINT8      , GxB_EQ_EQ_UINT8        , GxB_ANY_EQ_UINT8       , 
+    GxB_LOR_EQ_INT16       , GxB_LAND_EQ_INT16      , GxB_LXOR_EQ_INT16      , GxB_EQ_EQ_INT16        , GxB_ANY_EQ_INT16       , 
+    GxB_LOR_EQ_UINT16      , GxB_LAND_EQ_UINT16     , GxB_LXOR_EQ_UINT16     , GxB_EQ_EQ_UINT16       , GxB_ANY_EQ_UINT16      , 
+    GxB_LOR_EQ_INT32       , GxB_LAND_EQ_INT32      , GxB_LXOR_EQ_INT32      , GxB_EQ_EQ_INT32        , GxB_ANY_EQ_INT32       , 
+    GxB_LOR_EQ_UINT32      , GxB_LAND_EQ_UINT32     , GxB_LXOR_EQ_UINT32     , GxB_EQ_EQ_UINT32       , GxB_ANY_EQ_UINT32      , 
+    GxB_LOR_EQ_INT64       , GxB_LAND_EQ_INT64      , GxB_LXOR_EQ_INT64      , GxB_EQ_EQ_INT64        , GxB_ANY_EQ_INT64       , 
+    GxB_LOR_EQ_UINT64      , GxB_LAND_EQ_UINT64     , GxB_LXOR_EQ_UINT64     , GxB_EQ_EQ_UINT64       , GxB_ANY_EQ_UINT64      , 
+    GxB_LOR_EQ_FP32        , GxB_LAND_EQ_FP32       , GxB_LXOR_EQ_FP32       , GxB_EQ_EQ_FP32         , GxB_ANY_EQ_FP32        , 
+    GxB_LOR_EQ_FP64        , GxB_LAND_EQ_FP64       , GxB_LXOR_EQ_FP64       , GxB_EQ_EQ_FP64         , GxB_ANY_EQ_FP64        , 
+
+    // semirings with multiply op: z = NE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_NE_INT8        , GxB_LAND_NE_INT8       , GxB_LXOR_NE_INT8       , GxB_EQ_NE_INT8         , GxB_ANY_NE_INT8        , 
+    GxB_LOR_NE_UINT8       , GxB_LAND_NE_UINT8      , GxB_LXOR_NE_UINT8      , GxB_EQ_NE_UINT8        , GxB_ANY_NE_UINT8       , 
+    GxB_LOR_NE_INT16       , GxB_LAND_NE_INT16      , GxB_LXOR_NE_INT16      , GxB_EQ_NE_INT16        , GxB_ANY_NE_INT16       , 
+    GxB_LOR_NE_UINT16      , GxB_LAND_NE_UINT16     , GxB_LXOR_NE_UINT16     , GxB_EQ_NE_UINT16       , GxB_ANY_NE_UINT16      , 
+    GxB_LOR_NE_INT32       , GxB_LAND_NE_INT32      , GxB_LXOR_NE_INT32      , GxB_EQ_NE_INT32        , GxB_ANY_NE_INT32       , 
+    GxB_LOR_NE_UINT32      , GxB_LAND_NE_UINT32     , GxB_LXOR_NE_UINT32     , GxB_EQ_NE_UINT32       , GxB_ANY_NE_UINT32      , 
+    GxB_LOR_NE_INT64       , GxB_LAND_NE_INT64      , GxB_LXOR_NE_INT64      , GxB_EQ_NE_INT64        , GxB_ANY_NE_INT64       , 
+    GxB_LOR_NE_UINT64      , GxB_LAND_NE_UINT64     , GxB_LXOR_NE_UINT64     , GxB_EQ_NE_UINT64       , GxB_ANY_NE_UINT64      , 
+    GxB_LOR_NE_FP32        , GxB_LAND_NE_FP32       , GxB_LXOR_NE_FP32       , GxB_EQ_NE_FP32         , GxB_ANY_NE_FP32        , 
+    GxB_LOR_NE_FP64        , GxB_LAND_NE_FP64       , GxB_LXOR_NE_FP64       , GxB_EQ_NE_FP64         , GxB_ANY_NE_FP64        , 
+
+    // semirings with multiply op: z = GT (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_GT_INT8        , GxB_LAND_GT_INT8       , GxB_LXOR_GT_INT8       , GxB_EQ_GT_INT8         , GxB_ANY_GT_INT8        , 
+    GxB_LOR_GT_UINT8       , GxB_LAND_GT_UINT8      , GxB_LXOR_GT_UINT8      , GxB_EQ_GT_UINT8        , GxB_ANY_GT_UINT8       , 
+    GxB_LOR_GT_INT16       , GxB_LAND_GT_INT16      , GxB_LXOR_GT_INT16      , GxB_EQ_GT_INT16        , GxB_ANY_GT_INT16       , 
+    GxB_LOR_GT_UINT16      , GxB_LAND_GT_UINT16     , GxB_LXOR_GT_UINT16     , GxB_EQ_GT_UINT16       , GxB_ANY_GT_UINT16      , 
+    GxB_LOR_GT_INT32       , GxB_LAND_GT_INT32      , GxB_LXOR_GT_INT32      , GxB_EQ_GT_INT32        , GxB_ANY_GT_INT32       , 
+    GxB_LOR_GT_UINT32      , GxB_LAND_GT_UINT32     , GxB_LXOR_GT_UINT32     , GxB_EQ_GT_UINT32       , GxB_ANY_GT_UINT32      , 
+    GxB_LOR_GT_INT64       , GxB_LAND_GT_INT64      , GxB_LXOR_GT_INT64      , GxB_EQ_GT_INT64        , GxB_ANY_GT_INT64       , 
+    GxB_LOR_GT_UINT64      , GxB_LAND_GT_UINT64     , GxB_LXOR_GT_UINT64     , GxB_EQ_GT_UINT64       , GxB_ANY_GT_UINT64      , 
+    GxB_LOR_GT_FP32        , GxB_LAND_GT_FP32       , GxB_LXOR_GT_FP32       , GxB_EQ_GT_FP32         , GxB_ANY_GT_FP32        , 
+    GxB_LOR_GT_FP64        , GxB_LAND_GT_FP64       , GxB_LXOR_GT_FP64       , GxB_EQ_GT_FP64         , GxB_ANY_GT_FP64        , 
+
+    // semirings with multiply op: z = LT (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_LT_INT8        , GxB_LAND_LT_INT8       , GxB_LXOR_LT_INT8       , GxB_EQ_LT_INT8         , GxB_ANY_LT_INT8        , 
+    GxB_LOR_LT_UINT8       , GxB_LAND_LT_UINT8      , GxB_LXOR_LT_UINT8      , GxB_EQ_LT_UINT8        , GxB_ANY_LT_UINT8       , 
+    GxB_LOR_LT_INT16       , GxB_LAND_LT_INT16      , GxB_LXOR_LT_INT16      , GxB_EQ_LT_INT16        , GxB_ANY_LT_INT16       , 
+    GxB_LOR_LT_UINT16      , GxB_LAND_LT_UINT16     , GxB_LXOR_LT_UINT16     , GxB_EQ_LT_UINT16       , GxB_ANY_LT_UINT16      , 
+    GxB_LOR_LT_INT32       , GxB_LAND_LT_INT32      , GxB_LXOR_LT_INT32      , GxB_EQ_LT_INT32        , GxB_ANY_LT_INT32       , 
+    GxB_LOR_LT_UINT32      , GxB_LAND_LT_UINT32     , GxB_LXOR_LT_UINT32     , GxB_EQ_LT_UINT32       , GxB_ANY_LT_UINT32      , 
+    GxB_LOR_LT_INT64       , GxB_LAND_LT_INT64      , GxB_LXOR_LT_INT64      , GxB_EQ_LT_INT64        , GxB_ANY_LT_INT64       , 
+    GxB_LOR_LT_UINT64      , GxB_LAND_LT_UINT64     , GxB_LXOR_LT_UINT64     , GxB_EQ_LT_UINT64       , GxB_ANY_LT_UINT64      , 
+    GxB_LOR_LT_FP32        , GxB_LAND_LT_FP32       , GxB_LXOR_LT_FP32       , GxB_EQ_LT_FP32         , GxB_ANY_LT_FP32        , 
+    GxB_LOR_LT_FP64        , GxB_LAND_LT_FP64       , GxB_LXOR_LT_FP64       , GxB_EQ_LT_FP64         , GxB_ANY_LT_FP64        , 
+
+    // semirings with multiply op: z = GE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_GE_INT8        , GxB_LAND_GE_INT8       , GxB_LXOR_GE_INT8       , GxB_EQ_GE_INT8         , GxB_ANY_GE_INT8        , 
+    GxB_LOR_GE_UINT8       , GxB_LAND_GE_UINT8      , GxB_LXOR_GE_UINT8      , GxB_EQ_GE_UINT8        , GxB_ANY_GE_UINT8       , 
+    GxB_LOR_GE_INT16       , GxB_LAND_GE_INT16      , GxB_LXOR_GE_INT16      , GxB_EQ_GE_INT16        , GxB_ANY_GE_INT16       , 
+    GxB_LOR_GE_UINT16      , GxB_LAND_GE_UINT16     , GxB_LXOR_GE_UINT16     , GxB_EQ_GE_UINT16       , GxB_ANY_GE_UINT16      , 
+    GxB_LOR_GE_INT32       , GxB_LAND_GE_INT32      , GxB_LXOR_GE_INT32      , GxB_EQ_GE_INT32        , GxB_ANY_GE_INT32       , 
+    GxB_LOR_GE_UINT32      , GxB_LAND_GE_UINT32     , GxB_LXOR_GE_UINT32     , GxB_EQ_GE_UINT32       , GxB_ANY_GE_UINT32      , 
+    GxB_LOR_GE_INT64       , GxB_LAND_GE_INT64      , GxB_LXOR_GE_INT64      , GxB_EQ_GE_INT64        , GxB_ANY_GE_INT64       , 
+    GxB_LOR_GE_UINT64      , GxB_LAND_GE_UINT64     , GxB_LXOR_GE_UINT64     , GxB_EQ_GE_UINT64       , GxB_ANY_GE_UINT64      , 
+    GxB_LOR_GE_FP32        , GxB_LAND_GE_FP32       , GxB_LXOR_GE_FP32       , GxB_EQ_GE_FP32         , GxB_ANY_GE_FP32        , 
+    GxB_LOR_GE_FP64        , GxB_LAND_GE_FP64       , GxB_LXOR_GE_FP64       , GxB_EQ_GE_FP64         , GxB_ANY_GE_FP64        , 
+
+    // semirings with multiply op: z = LE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_LE_INT8        , GxB_LAND_LE_INT8       , GxB_LXOR_LE_INT8       , GxB_EQ_LE_INT8         , GxB_ANY_LE_INT8        , 
+    GxB_LOR_LE_UINT8       , GxB_LAND_LE_UINT8      , GxB_LXOR_LE_UINT8      , GxB_EQ_LE_UINT8        , GxB_ANY_LE_UINT8       , 
+    GxB_LOR_LE_INT16       , GxB_LAND_LE_INT16      , GxB_LXOR_LE_INT16      , GxB_EQ_LE_INT16        , GxB_ANY_LE_INT16       , 
+    GxB_LOR_LE_UINT16      , GxB_LAND_LE_UINT16     , GxB_LXOR_LE_UINT16     , GxB_EQ_LE_UINT16       , GxB_ANY_LE_UINT16      , 
+    GxB_LOR_LE_INT32       , GxB_LAND_LE_INT32      , GxB_LXOR_LE_INT32      , GxB_EQ_LE_INT32        , GxB_ANY_LE_INT32       , 
+    GxB_LOR_LE_UINT32      , GxB_LAND_LE_UINT32     , GxB_LXOR_LE_UINT32     , GxB_EQ_LE_UINT32       , GxB_ANY_LE_UINT32      , 
+    GxB_LOR_LE_INT64       , GxB_LAND_LE_INT64      , GxB_LXOR_LE_INT64      , GxB_EQ_LE_INT64        , GxB_ANY_LE_INT64       , 
+    GxB_LOR_LE_UINT64      , GxB_LAND_LE_UINT64     , GxB_LXOR_LE_UINT64     , GxB_EQ_LE_UINT64       , GxB_ANY_LE_UINT64      , 
+    GxB_LOR_LE_FP32        , GxB_LAND_LE_FP32       , GxB_LXOR_LE_FP32       , GxB_EQ_LE_FP32         , GxB_ANY_LE_FP32        , 
+    GxB_LOR_LE_FP64        , GxB_LAND_LE_FP64       , GxB_LXOR_LE_FP64       , GxB_EQ_LE_FP64         , GxB_ANY_LE_FP64        , 
+
+//------------------------------------------------------------------------------
+// 55 semirings with purely Boolean types, bool x bool -> bool
+//------------------------------------------------------------------------------
+
+    // purely boolean semirings (in the form GxB_(add monoid)_(multipy operator)_BOOL:
+    GxB_LOR_FIRST_BOOL     , GxB_LAND_FIRST_BOOL    , GxB_LXOR_FIRST_BOOL    , GxB_EQ_FIRST_BOOL      , GxB_ANY_FIRST_BOOL     , 
+    GxB_LOR_SECOND_BOOL    , GxB_LAND_SECOND_BOOL   , GxB_LXOR_SECOND_BOOL   , GxB_EQ_SECOND_BOOL     , GxB_ANY_SECOND_BOOL    , 
+    GxB_LOR_PAIR_BOOL      , GxB_LAND_PAIR_BOOL     , GxB_LXOR_PAIR_BOOL     , GxB_EQ_PAIR_BOOL       , GxB_ANY_PAIR_BOOL      , 
+    GxB_LOR_LOR_BOOL       , GxB_LAND_LOR_BOOL      , GxB_LXOR_LOR_BOOL      , GxB_EQ_LOR_BOOL        , GxB_ANY_LOR_BOOL       , 
+    GxB_LOR_LAND_BOOL      , GxB_LAND_LAND_BOOL     , GxB_LXOR_LAND_BOOL     , GxB_EQ_LAND_BOOL       , GxB_ANY_LAND_BOOL      , 
+    GxB_LOR_LXOR_BOOL      , GxB_LAND_LXOR_BOOL     , GxB_LXOR_LXOR_BOOL     , GxB_EQ_LXOR_BOOL       , GxB_ANY_LXOR_BOOL      , 
+    GxB_LOR_EQ_BOOL        , GxB_LAND_EQ_BOOL       , GxB_LXOR_EQ_BOOL       , GxB_EQ_EQ_BOOL         , GxB_ANY_EQ_BOOL        , 
+    GxB_LOR_GT_BOOL        , GxB_LAND_GT_BOOL       , GxB_LXOR_GT_BOOL       , GxB_EQ_GT_BOOL         , GxB_ANY_GT_BOOL        , 
+    GxB_LOR_LT_BOOL        , GxB_LAND_LT_BOOL       , GxB_LXOR_LT_BOOL       , GxB_EQ_LT_BOOL         , GxB_ANY_LT_BOOL        , 
+    GxB_LOR_GE_BOOL        , GxB_LAND_GE_BOOL       , GxB_LXOR_GE_BOOL       , GxB_EQ_GE_BOOL         , GxB_ANY_GE_BOOL        , 
+    GxB_LOR_LE_BOOL        , GxB_LAND_LE_BOOL       , GxB_LXOR_LE_BOOL       , GxB_EQ_LE_BOOL         , GxB_ANY_LE_BOOL        ; 
+
 
 //------------------------------------------------------------------------------
 // GxB_resize:  change the size of a matrix or vector
diff --git a/Config/README.md.in b/Config/README.md.in
index cb5b78ea8d..badf06c87c 100644
--- a/Config/README.md.in
+++ b/Config/README.md.in
@@ -1,6 +1,6 @@
 # SuiteSparse:GraphBLAS
 
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 VERSION @GraphBLAS_VERSION_MAJOR@.@GraphBLAS_VERSION_MINOR@.@GraphBLAS_VERSION_SUB@, @GraphBLAS_DATE@
@@ -85,8 +85,6 @@ Test:           Extensive tests, not meant for general usage.  To compile
                 SuiteSparse:GraphBLAS and test in MATLAB, go to this directory
                 and type make;testall in MATLAB.
 
-User:           user-defined objects at compile-time (.m4 files)
-
 build:          build directory for CMake, initially empty
 
 alternative:    an alternative to CMake; edit the alternative/Makefile and do
@@ -104,3 +102,11 @@ file.  All functions, objects, and macros with the prefix GxB are extensions to
 the spec.  Functions, objects, and macros with prefix GB must not be accessed
 by user code.  They are for internal use in GraphBLAS only.
 
+
+--------------------------------------------------------------------------------
+## For Windows users:
+
+This version is not compatible with Microsoft Visual Studio.  Use another
+compiler, or use SuiteSparse:GraphBLAS v3.1.2 instead.  See the User Guide
+for more details.
+
diff --git a/Config/user_dec1.m4 b/Config/user_dec1.m4
deleted file mode 100644
index 2153f4ff0c..0000000000
--- a/Config/user_dec1.m4
+++ /dev/null
@@ -1,25 +0,0 @@
-//==============================================================================
-// user-defined objects defined by SuiteSparse/GraphBLAS/User/*.m4
-//==============================================================================
-
-// Declarations appended to SuiteSparse/GraphBLAS/Include/GraphBLAS.h.
-
-#if defined __INTEL_COMPILER
-#pragma warning (disable: 869 )
-#elif defined __GNUC__
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#endif
-
-#ifndef GxB_USER_INCLUDE
-#define GxB_USER_INCLUDE
-#endif
-
-#ifndef GxB_USER_H
-#define GxB_USER_H
-m4_define(`GxB_Type_define',            `extern GrB_Type $1')
-m4_define(`GxB_UnaryOp_define',         `extern GrB_UnaryOp $1')
-m4_define(`GxB_BinaryOp_define',        `extern GrB_BinaryOp $1')
-m4_define(`GxB_SelectOp_define',        `extern GxB_SelectOp $1') 
-m4_define(`GxB_Monoid_define',          `extern GrB_Monoid $1')
-m4_define(`GxB_Monoid_terminal_define', `extern GrB_Monoid $1')
-m4_define(`GxB_Semiring_define',        `extern GrB_Semiring $1')
diff --git a/Config/user_dec2.m4 b/Config/user_dec2.m4
deleted file mode 100644
index 3e2a5c88e0..0000000000
--- a/Config/user_dec2.m4
+++ /dev/null
@@ -1,3 +0,0 @@
-#endif
-
-#undef GxB_USER_INCLUDE
diff --git a/Config/user_def0.m4 b/Config/user_def0.m4
deleted file mode 100644
index 36406302f5..0000000000
--- a/Config/user_def0.m4
+++ /dev/null
@@ -1,12 +0,0 @@
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Source/all_user_objects.c
-//------------------------------------------------------------------------------
-
-// This file is constructed automatically by cmake and m4 when GraphBLAS is
-// compiled, from the Config/user_def*.m4 and *.m4 files in User/.  Do not edit
-// this file directly.  It contains references to internally-defined functions
-// and objects inside GraphBLAS, which are not user-callable.
-
-#include "GB_mxm.h"
-#include "GB_user.h"
-
diff --git a/Config/user_def1.m4 b/Config/user_def1.m4
deleted file mode 100644
index 5de0f9ef3e..0000000000
--- a/Config/user_def1.m4
+++ /dev/null
@@ -1,258 +0,0 @@
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Config/user_def1.m4: define user-defined objects
-//------------------------------------------------------------------------------
-
-m4_define(`GxB_Type_define', `
-    #define GB_DEF_$1_type $2
-    struct GB_Type_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,           // object is defined
-        sizeof ($2),        // size of the type
-        GB_UCT_code,        // user-defined at compile-time
-        "$2"
-    } ;
-    GrB_Type $1 = & GB_opaque_$1')
-
-m4_define(`GxB_UnaryOp_define', `
-    #define GB_DEF_$1_function $2
-    #define GB_DEF_$1_ztype GB_DEF_$3_type
-    #define GB_DEF_$1_xtype GB_DEF_$4_type
-    extern void $2
-    (
-        GB_DEF_$1_ztype *z,
-        const GB_DEF_$1_xtype *x
-    ) ;
-    struct GB_UnaryOp_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,           // object is defined
-        & GB_opaque_$4,     // type of x
-        & GB_opaque_$3,     // type of z
-        $2,                 // pointer to the C function
-        "$2",
-        GB_USER_C_opcode    // user-defined at compile-time
-    } ;
-    GrB_UnaryOp $1 = & GB_opaque_$1')
-
-m4_define(`GxB_BinaryOp_define', `
-    #define GB_DEF_$1_function $2
-    #define GB_DEF_$1_ztype GB_DEF_$3_type
-    #define GB_DEF_$1_xtype GB_DEF_$4_type
-    #define GB_DEF_$1_ytype GB_DEF_$5_type
-    extern void $2
-    (
-        GB_DEF_$1_ztype *z,
-        const GB_DEF_$1_xtype *x,
-        const GB_DEF_$1_ytype *y
-    ) ;
-    struct GB_BinaryOp_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,           // object is defined
-        & GB_opaque_$4,     // type of x
-        & GB_opaque_$5,     // type of y
-        & GB_opaque_$3,     // type of z
-        $2,                 // pointer to the C function
-        "$2",
-        GB_USER_C_opcode    // user-defined at compile-time
-    } ;
-    GrB_BinaryOp $1 = & GB_opaque_$1')
-
-m4_define(`GxB_Monoid_define', `
-    #define GB_DEF_$1_add GB_DEF_$2_function
-    #define GB_DEF_$1_zsize sizeof (GB_DEF_$2_ztype)
-    GB_DEF_$2_ztype GB_DEF_$1_identity = $3 ;
-    struct GB_Monoid_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,           // object is defined
-        & GB_opaque_$2,     // binary operator
-        & GB_DEF_$1_identity,   // identity value
-        GB_DEF_$1_zsize,    // identity size
-        GB_USER_COMPILED,   // user-defined at compile-time
-        NULL                // no terminal value
-    } ;
-    GrB_Monoid $1 = & GB_opaque_$1')
-
-m4_define(`GxB_Monoid_terminal_define', `
-    #define GB_DEF_$1_add GB_DEF_$2_function
-    #define GB_DEF_$1_zsize sizeof (GB_DEF_$2_ztype)
-    #define GB_DEF_$1_is_user_terminal
-    GB_DEF_$2_ztype GB_DEF_$1_identity = $3 ;
-    GB_DEF_$2_ztype GB_DEF_$1_user_terminal = $4 ;
-    struct GB_Monoid_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,                   // object is defined
-        & GB_opaque_$2,             // binary operator
-        & GB_DEF_$1_identity,       // identity value
-        GB_DEF_$1_zsize,            // identity and terminal size
-        GB_USER_COMPILED,           // user-defined at compile-time
-        & GB_DEF_$1_user_terminal   // terminal value
-    } ;
-    GrB_Monoid $1 = & GB_opaque_$1')
-
-m4_define(`GB_semirings', `if (0)
-    {
-        ;
-    }')
-
-m4_define(`GB_semiring', `m4_define(`GB_semirings', GB_semirings()
-    else if (GB_s == $1)
-    {
-        if (GB_AxB_method == GxB_AxB_GUSTAVSON)
-        {
-            if (GB_flipxy)
-            {
-                GB_info = GB_AxB_user_gus_$1_flipxy
-                    (*GB_Chandle, GB_M, GB_A, false, GB_B, false, GB_C_Sauna) ;
-            }
-            else
-            {
-                GB_info = GB_AxB_user_gus_$1
-                    (*GB_Chandle, GB_M, GB_A, false, GB_B, false, GB_C_Sauna) ;
-            }
-        }
-        else if (GB_AxB_method == GxB_AxB_DOT)
-        {
-
-            if (GB_Aslice == NULL)
-            {
-                if (GB_flipxy)
-                {
-                    GB_info = GB_AxB_user_dot3_$1_flipxy
-                        (*GB_Chandle, GB_M, GB_A, false, GB_B, false,
-                            GB_TaskList, GB_ntasks, GB_dot_nthreads) ;
-                }
-                else
-                {
-                    GB_info = GB_AxB_user_dot3_$1
-                        (*GB_Chandle, GB_M, GB_A, false, GB_B, false,
-                            GB_TaskList, GB_ntasks, GB_dot_nthreads) ;
-                }
-            }
-            else
-            {
-                if (GB_flipxy)
-                {
-                    GB_info = GB_AxB_user_dot2_$1_flipxy
-                        (*GB_Chandle, GB_M,
-                        GB_Aslice, false, GB_B, false, GB_B_slice,
-                        GB_C_counts, GB_dot_nthreads, GB_naslice, GB_nbslice) ;
-                }
-                else
-                {
-                    GB_info = GB_AxB_user_dot2_$1
-                        (*GB_Chandle, GB_M,
-                        GB_Aslice, false, GB_B, false, GB_B_slice,
-                        GB_C_counts, GB_dot_nthreads, GB_naslice, GB_nbslice) ;
-                }
-            }
-        }
-        else // (GB_AxB_method == GxB_AxB_HEAP)
-        {
-            if (GB_flipxy)
-            {
-                GB_info = GB_AxB_user_heap_$1_flipxy
-                    (GB_Chandle, GB_M, GB_A, false, GB_B, false,
-                    GB_List, GB_pA_pair, GB_Heap, GB_bjnz_max) ;
-            }
-            else
-            {
-                GB_info = GB_AxB_user_heap_$1
-                    (GB_Chandle, GB_M, GB_A, false, GB_B, false,
-                    GB_List, GB_pA_pair, GB_Heap, GB_bjnz_max) ;
-            }
-        }
-    } ) $2')
-
-m4_define(`GxB_Semiring_define', `GB_semiring($1,`
-    #undef GBCOMPACT
-    #define GB_ADD(z,y)    GB_DEF_$2_add (&(z), &(z), &(y))
-    #define GB_MULTIPLY_ADD(c,a,b)  \
-    {                               \
-        GB_ctype t ;                \
-        GB_MULTIPLY(t,a,b) ;        \
-        GB_ADD(c,t) ;               \
-    }
-    #define GB_identity    GB_DEF_$2_identity
-    #define GB_dot_simd    ;
-    #if defined ( GB_DEF_$2_is_user_terminal )
-        #define GB_terminal if (memcmp (&cij, &GB_DEF_$2_user_terminal, GB_DEF_$2_zsize) == 0) break ;
-    #elif defined ( GB_DEF_$2_terminal )
-        #define GB_terminal if (cij == GB_DEF_$2_terminal) break ;
-    #else
-        #define GB_terminal ;
-    #endif
-    #define GB_ctype    GB_DEF_$3_ztype
-    #define GB_geta(a,Ax,p) GB_atype a = Ax [p]
-    #define GB_getb(b,Bx,p) GB_btype b = Bx [p]
-    #define GB_AgusB    GB_AxB_user_gus_$1
-    #define GB_Adot2B   GB_AxB_user_dot2_$1
-    #define GB_Adot3B   GB_AxB_user_dot3_$1
-    #define GB_AheapB   GB_AxB_user_heap_$1
-    #define GB_MULTIPLY(z,x,y) GB_DEF_$3_function (&(z), &(x), &(y))
-    #define GB_atype    GB_DEF_$3_xtype
-    #define GB_btype    GB_DEF_$3_ytype
-    #include "GB_AxB.h"
-    #include "GB_AxB.c"
-    #undef GB_atype
-    #undef GB_btype
-    #undef GB_MULTIPLY
-    #undef GB_AgusB
-    #undef GB_Adot2B
-    #undef GB_Adot3B
-    #undef GB_AheapB
-    #define GB_AgusB    GB_AxB_user_gus_$1_flipxy
-    #define GB_Adot2B   GB_AxB_user_dot2_$1_flipxy
-    #define GB_Adot3B   GB_AxB_user_dot3_$1_flipxy
-    #define GB_AheapB   GB_AxB_user_heap_$1_flipxy
-    #define GB_MULTIPLY(z,x,y) GB_DEF_$3_function (&(z), &(y), &(x))
-    #define GB_atype    GB_DEF_$3_ytype
-    #define GB_btype    GB_DEF_$3_xtype
-    #include "GB_AxB.h"
-    #include "GB_AxB.c"
-    #undef GB_atype
-    #undef GB_btype
-    #undef GB_MULTIPLY
-    #undef GB_AgusB
-    #undef GB_Adot2B
-    #undef GB_Adot3B
-    #undef GB_AheapB
-    #undef GB_ADD
-    #undef GB_identity
-    #undef GB_dot_simd
-    #undef GB_terminal
-    #undef GB_ctype
-    #undef GB_geta
-    #undef GB_getb
-    struct GB_Semiring_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,           // object is defined
-        & GB_opaque_$2,     // add monoid
-        & GB_opaque_$3,     // multiply operator
-        GB_USER_COMPILED    // user-defined at compile-time
-    } ;
-    GrB_Semiring $1 = & GB_opaque_$1')')
-
-m4_define(`GxB_SelectOp_define', `
-    #define GB_DEF_$1_function $2
-    extern bool $2
-    (
-        GrB_Index i,
-        GrB_Index j,
-        GrB_Index nrows,
-        GrB_Index ncols,
-        const m4_ifelse(`$3', `NULL', `void', `GB_DEF_$3_type') *x,
-        const m4_ifelse(`$4', `NULL', `void', `GB_DEF_$4_type') *thunk
-    ) ;
-    struct GB_SelectOp_opaque GB_opaque_$1 =
-    {
-        GB_MAGIC,            // object is defined
-        m4_ifelse(`$3', `NULL',
-            `NULL,  // x not used',
-            `& GB_opaque_$3, // type of x')
-        m4_ifelse(`$4', `NULL',
-            `NULL,  // thunk not used',
-            `& GB_opaque_$4, // type of thunk')
-        $2,                  // pointer to the C function
-        "$2",
-        GB_USER_SELECT_C_opcode // user-defined at compile-time
-    } ;
-    GxB_SelectOp $1 = & GB_opaque_$1')
diff --git a/Config/user_def2.m4 b/Config/user_def2.m4
deleted file mode 100644
index 788a8bb40c..0000000000
--- a/Config/user_def2.m4
+++ /dev/null
@@ -1,42 +0,0 @@
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Config/user_def2.m4: code to call user semirings
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AxB_user
-(
-    const GrB_Desc_Value GB_AxB_method,
-    const GrB_Semiring GB_s,
-
-    GrB_Matrix *GB_Chandle,
-    const GrB_Matrix GB_M,
-    const GrB_Matrix GB_A,          // not used for dot2 method
-    const GrB_Matrix GB_B,
-    bool GB_flipxy,
-
-    // for heap method only:
-    int64_t *GB_RESTRICT GB_List,
-    GB_pointer_pair *GB_RESTRICT GB_pA_pair,
-    GB_Element *GB_RESTRICT GB_Heap,
-    const int64_t GB_bjnz_max,
-
-    // for Gustavson's method only:
-    GB_Sauna GB_C_Sauna,
-
-    // for dot method only:
-    const GrB_Matrix *GB_Aslice,    // for dot2 only
-    int64_t *GB_RESTRICT GB_B_slice,   // for dot2 only
-    const int GB_dot_nthreads,      // for dot2 and dot3
-    const int GB_naslice,           // for dot2 only
-    const int GB_nbslice,           // for dot2 only
-    int64_t **GB_C_counts,          // for dot2 only
-
-    // for dot3 method only:
-    const GB_task_struct *GB_RESTRICT GB_TaskList,
-    const int GB_ntasks
-)
-{
-    GrB_Info GB_info = GrB_SUCCESS ;
-    GB_semirings()
-    return (GB_info) ;
-}
-
diff --git a/Demo/Include/demos.h b/Demo/Include/demos.h
index 5d1500cc85..c6afcfb989 100644
--- a/Demo/Include/demos.h
+++ b/Demo/Include/demos.h
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Include/demos.h: include file for all demo programs
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Include/prand.h b/Demo/Include/prand.h
index 1030fc9e58..0fb092deaa 100644
--- a/Demo/Include/prand.h
+++ b/Demo/Include/prand.h
@@ -62,6 +62,9 @@
         prand_finalize ( ) ;            // free the prand types and operators
 */
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 #include "GraphBLAS.h"
 
 // prand_init:  create the random seed type and its operators
diff --git a/Demo/Include/simple_rand.h b/Demo/Include/simple_rand.h
index dd35ade88f..2e18f738a2 100644
--- a/Demo/Include/simple_rand.h
+++ b/Demo/Include/simple_rand.h
@@ -2,7 +2,7 @@
 /* GraphBLAS/Demo/Include/simple_rand.h: a very simple random number generator*/
 /* -------------------------------------------------------------------------- */
 
-/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved*/
+/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved*/
 /* http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.        */
 
 /* -------------------------------------------------------------------------- */
diff --git a/Demo/Include/simple_timer.h b/Demo/Include/simple_timer.h
index c07feba81a..bcc84c1efe 100644
--- a/Demo/Include/simple_timer.h
+++ b/Demo/Include/simple_timer.h
@@ -2,7 +2,7 @@
 /* GraphBLAS/Demo/Include/simple_timer.h: a timer for performance measurements*/
 /* -------------------------------------------------------------------------- */
 
-/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved*/
+/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved*/
 /* http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.        */
 
 /* -------------------------------------------------------------------------- */
diff --git a/Demo/Include/usercomplex.h b/Demo/Include/usercomplex.h
index 1ac8f01c0c..4d2bda0bef 100644
--- a/Demo/Include/usercomplex.h
+++ b/Demo/Include/usercomplex.h
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Include/usercomplex.h:  complex numbers as a user-defined type
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 #ifndef USERCOMPLEX_H
 #define USERCOMPLEX_H
 
@@ -27,7 +32,7 @@ extern
 GrB_BinaryOp Complex_first , Complex_second , Complex_min ,
              Complex_max   , Complex_plus   , Complex_minus ,
              Complex_times , Complex_div    , Complex_rdiv  ,
-             Complex_rminus ;
+             Complex_rminus, Complex_pair ;
 
 //------------------------------------------------------------------------------
 // 6 binary comparison functions, z=f(x,y), where CxC -> C
@@ -87,12 +92,7 @@ extern GrB_UnaryOp Complex_complex_real, Complex_complex_imag ;
 // Complex type, scalars, monoids, and semiring
 //------------------------------------------------------------------------------
 
-#ifdef MY_COMPLEX
-// use the pre-defined type in User/my_complex.m4
-#define Complex My_Complex
-#else
 extern GrB_Type Complex ;
-#endif
 
 extern GrB_Monoid   Complex_plus_monoid, Complex_times_monoid ;
 extern GrB_Semiring Complex_plus_times ;
diff --git a/Demo/MATLAB/Contents.m b/Demo/MATLAB/Contents.m
index c0d0dcb675..cbb52fcf06 100644
--- a/Demo/MATLAB/Contents.m
+++ b/Demo/MATLAB/Contents.m
@@ -11,3 +11,6 @@
 %   ipagerank    - compute the pagerank of nodes in a graph using an integer semiring
 %   rowscale     - row scale an adjacency matrix by out-degree
 %   dpagerank2   - compute the pagerank of nodes in a graph using a real semiring
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
diff --git a/Demo/MATLAB/adj_to_edges.m b/Demo/MATLAB/adj_to_edges.m
index ea17a883a8..70da047a09 100644
--- a/Demo/MATLAB/adj_to_edges.m
+++ b/Demo/MATLAB/adj_to_edges.m
@@ -8,6 +8,9 @@
 % E has size n-by-nnz(A), where n=size(A,1).  A must be square, and
 % its diagonal is ignored.  A is symmetrized with A=A+A'.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 [m n] = size (A) ;
 if (m ~= n)
     error ('A must be square') ;
diff --git a/Demo/MATLAB/check_adj.m b/Demo/MATLAB/check_adj.m
index aaf1335b2b..ca775f0ae0 100644
--- a/Demo/MATLAB/check_adj.m
+++ b/Demo/MATLAB/check_adj.m
@@ -4,6 +4,9 @@ function check_adj (A)
 %
 % A must be square, symmetric, binary, with no entries on the diagonal
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 [m n] = size (A) ;
 if (m ~= n)
     error ('A must be square') ;
diff --git a/Demo/MATLAB/dpagerank.m b/Demo/MATLAB/dpagerank.m
index d7c53a96f0..20a13cf927 100644
--- a/Demo/MATLAB/dpagerank.m
+++ b/Demo/MATLAB/dpagerank.m
@@ -11,6 +11,9 @@
 %
 % See also ipagerank.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % original problem in real arithmetic
 n = size (A,1) ;        % number of nodes
 c = 0.85 ;              % probability of walking to random neighbor
diff --git a/Demo/MATLAB/dpagerank2.m b/Demo/MATLAB/dpagerank2.m
index 41c98365c7..919fa6477e 100644
--- a/Demo/MATLAB/dpagerank2.m
+++ b/Demo/MATLAB/dpagerank2.m
@@ -21,6 +21,9 @@
 %
 % See also ipagerank.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 if (nargin < 2)
     tol = 1e-5 ;        % stopping criterion
 end
diff --git a/Demo/MATLAB/edges_to_adj.m b/Demo/MATLAB/edges_to_adj.m
index fce3680f02..f6a8e9b869 100644
--- a/Demo/MATLAB/edges_to_adj.m
+++ b/Demo/MATLAB/edges_to_adj.m
@@ -8,6 +8,9 @@
 %
 % C is a symmetric binary matrix with no self edges.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % check input E
 [i j x] = find (E) ;
 if (any (x ~= 1))
diff --git a/Demo/MATLAB/ipagerank.m b/Demo/MATLAB/ipagerank.m
index 123017ae9d..9ca2e77158 100644
--- a/Demo/MATLAB/ipagerank.m
+++ b/Demo/MATLAB/ipagerank.m
@@ -11,6 +11,9 @@
 %
 % See also dpagerank.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % original problem in real arithmetic
 n = size (A,1) ;        % number of nodes
 c = 0.85 ;              % probability of walking to random neighbor
diff --git a/Demo/MATLAB/kron_demo.m b/Demo/MATLAB/kron_demo.m
index 071f0415bc..53f0f29210 100644
--- a/Demo/MATLAB/kron_demo.m
+++ b/Demo/MATLAB/kron_demo.m
@@ -3,6 +3,9 @@
 % Usage:
 % [C err] = kron_demo (A,B)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % write A to a file
 Afile = fopen ('A.tsv', 'w') ;
 [i j x] = find (A) ;
diff --git a/Demo/MATLAB/kron_test.m b/Demo/MATLAB/kron_test.m
index a3cc9f65ec..71911ddb3f 100644
--- a/Demo/MATLAB/kron_test.m
+++ b/Demo/MATLAB/kron_test.m
@@ -1,5 +1,8 @@
 %KRON_TEST test kron_demo.m
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear
 A = sprand (10, 20, 0.1) ;
 A (10,20) = 42 ;
diff --git a/Demo/MATLAB/rowscale.m b/Demo/MATLAB/rowscale.m
index dbe1ee6bd5..7b0560bf4a 100644
--- a/Demo/MATLAB/rowscale.m
+++ b/Demo/MATLAB/rowscale.m
@@ -2,6 +2,9 @@
 %ROWSCALE row scale an adjacency matrix by out-degree
 % C = rowscale (A)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % scale the adjacency matrix by out-degree
 dout = sum (A,2) ;              % dout(i) is the out-degree of node i
 is_nonempty = (dout > 0) ;      % find vertices with outgoing edges
diff --git a/Demo/MATLAB/tri_matlab.m b/Demo/MATLAB/tri_matlab.m
index cff4c8007d..df7846f361 100644
--- a/Demo/MATLAB/tri_matlab.m
+++ b/Demo/MATLAB/tri_matlab.m
@@ -1,4 +1,8 @@
 %TRI_MATLAB run tricount tests in MATLAB
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear
 diary tri_matlab_out.txt
 
diff --git a/Demo/MATLAB/tricount.m b/Demo/MATLAB/tricount.m
index 64f083d3a3..2f937b99bf 100644
--- a/Demo/MATLAB/tricount.m
+++ b/Demo/MATLAB/tricount.m
@@ -42,7 +42,7 @@
 % sparse column form, so the MATLAB equivalent of the Sandia method is
 % sum(sum((U*U).*U)).
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Demo/Output/bfs_demo.out b/Demo/Output/bfs_demo.out
index 7988edf6a2..7e82bf2854 100644
--- a/Demo/Output/bfs_demo.out
+++ b/Demo/Output/bfs_demo.out
@@ -2,88 +2,88 @@ Wathen: nx 4 ny 4 n 65 nz 817 method 0, time: 0.000 sec
 number of nodes: 65
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000292
+BFS time in seconds:       0.000103
 nodes reachable from node 0: 65 out of 65
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.000179
+BFS time in seconds:       0.000053
 nodes reachable from node 0: 65 out of 65
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.000249
+BFS time in seconds:       0.000045
 nodes reachable from node 0: 65 out of 65
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.000234
+BFS time in seconds:       0.000035
 nodes reachable from node 0: 65 out of 65
 max BFS level: 5
 random 5 by 5, nz: 21, method 1 time 0.000 sec
 number of nodes: 5
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000213
+BFS time in seconds:       0.000066
 nodes reachable from node 0: 5 out of 5
 max BFS level: 3
 
 method 5: same but check each result
-BFS time in seconds:       0.000117
+BFS time in seconds:       0.000024
 nodes reachable from node 0: 5 out of 5
 max BFS level: 3
 
 method 6: apply unary operator
-BFS time in seconds:       0.000156
+BFS time in seconds:       0.000029
 nodes reachable from node 0: 5 out of 5
 max BFS level: 3
 
 method 6: same but check each result
-BFS time in seconds:       0.000143
+BFS time in seconds:       0.000022
 nodes reachable from node 0: 5 out of 5
 max BFS level: 3
 matrix 3 by 3, 3 entries, from stdin
 number of nodes: 3
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000228
+BFS time in seconds:       0.000048
 nodes reachable from node 0: 1 out of 3
 max BFS level: 1
 
 method 5: same but check each result
-BFS time in seconds:       0.000120
+BFS time in seconds:       0.000018
 nodes reachable from node 0: 1 out of 3
 max BFS level: 1
 
 method 6: apply unary operator
-BFS time in seconds:       0.000133
+BFS time in seconds:       0.000022
 nodes reachable from node 0: 1 out of 3
 max BFS level: 1
 
 method 6: same but check each result
-BFS time in seconds:       0.000122
+BFS time in seconds:       0.000015
 nodes reachable from node 0: 1 out of 3
 max BFS level: 1
 matrix 4 by 4, 8 entries, from stdin
 number of nodes: 4
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000204
+BFS time in seconds:       0.000050
 nodes reachable from node 0: 2 out of 4
 max BFS level: 2
 
 method 5: same but check each result
-BFS time in seconds:       0.000120
+BFS time in seconds:       0.000019
 nodes reachable from node 0: 2 out of 4
 max BFS level: 2
 
 method 6: apply unary operator
-BFS time in seconds:       0.000152
+BFS time in seconds:       0.000025
 nodes reachable from node 0: 2 out of 4
 max BFS level: 2
 
 method 6: same but check each result
-BFS time in seconds:       0.000194
+BFS time in seconds:       0.000018
 nodes reachable from node 0: 2 out of 4
 max BFS level: 2
 
@@ -99,22 +99,22 @@ matrix 4 by 4, 14 entries, from stdin
 number of nodes: 4
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000222
+BFS time in seconds:       0.000065
 nodes reachable from node 0: 4 out of 4
 max BFS level: 2
 
 method 5: same but check each result
-BFS time in seconds:       0.000162
+BFS time in seconds:       0.000019
 nodes reachable from node 0: 4 out of 4
 max BFS level: 2
 
 method 6: apply unary operator
-BFS time in seconds:       0.000156
+BFS time in seconds:       0.000026
 nodes reachable from node 0: 4 out of 4
 max BFS level: 2
 
 method 6: same but check each result
-BFS time in seconds:       0.000143
+BFS time in seconds:       0.000017
 nodes reachable from node 0: 4 out of 4
 max BFS level: 2
 
@@ -130,329 +130,329 @@ matrix 7 by 7, 16 entries, from stdin
 number of nodes: 7
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000164
+BFS time in seconds:       0.000052
 nodes reachable from node 0: 7 out of 7
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.000109
+BFS time in seconds:       0.000030
 nodes reachable from node 0: 7 out of 7
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.000159
+BFS time in seconds:       0.000039
 nodes reachable from node 0: 7 out of 7
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.000144
+BFS time in seconds:       0.000030
 nodes reachable from node 0: 7 out of 7
 max BFS level: 5
 matrix 304 by 304, 876 entries, from stdin
 number of nodes: 304
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000448
+BFS time in seconds:       0.000144
 nodes reachable from node 0: 304 out of 304
 max BFS level: 17
 
 method 5: same but check each result
-BFS time in seconds:       0.000379
+BFS time in seconds:       0.000100
 nodes reachable from node 0: 304 out of 304
 max BFS level: 17
 
 method 6: apply unary operator
-BFS time in seconds:       0.000555
+BFS time in seconds:       0.000116
 nodes reachable from node 0: 304 out of 304
 max BFS level: 17
 
 method 6: same but check each result
-BFS time in seconds:       0.000773
+BFS time in seconds:       0.000107
 nodes reachable from node 0: 304 out of 304
 max BFS level: 17
 matrix 48 by 48, 400 entries, from stdin
 number of nodes: 48
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000177
+BFS time in seconds:       0.000059
 nodes reachable from node 0: 48 out of 48
 max BFS level: 4
 
 method 5: same but check each result
-BFS time in seconds:       0.000104
+BFS time in seconds:       0.000030
 nodes reachable from node 0: 48 out of 48
 max BFS level: 4
 
 method 6: apply unary operator
-BFS time in seconds:       0.000139
+BFS time in seconds:       0.000035
 nodes reachable from node 0: 48 out of 48
 max BFS level: 4
 
 method 6: same but check each result
-BFS time in seconds:       0.000132
+BFS time in seconds:       0.000030
 nodes reachable from node 0: 48 out of 48
 max BFS level: 4
 matrix 4884 by 4884, 290378 entries, from stdin
 number of nodes: 4884
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.003146
+BFS time in seconds:       0.001652
 nodes reachable from node 0: 4810 out of 4884
 max BFS level: 44
 
 method 5: same but check each result
-BFS time in seconds:       0.003059
+BFS time in seconds:       0.001546
 nodes reachable from node 0: 4810 out of 4884
 max BFS level: 44
 
 method 6: apply unary operator
-BFS time in seconds:       0.003773
+BFS time in seconds:       0.001583
 nodes reachable from node 0: 4810 out of 4884
 max BFS level: 44
 
 method 6: same but check each result
-BFS time in seconds:       0.003574
+BFS time in seconds:       0.001540
 nodes reachable from node 0: 4810 out of 4884
 max BFS level: 44
 matrix 183 by 183, 1585 entries, from stdin
 number of nodes: 183
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000297
+BFS time in seconds:       0.000083
 nodes reachable from node 0: 183 out of 183
 max BFS level: 4
 
 method 5: same but check each result
-BFS time in seconds:       0.000212
+BFS time in seconds:       0.000042
 nodes reachable from node 0: 183 out of 183
 max BFS level: 4
 
 method 6: apply unary operator
-BFS time in seconds:       0.000271
+BFS time in seconds:       0.000048
 nodes reachable from node 0: 183 out of 183
 max BFS level: 4
 
 method 6: same but check each result
-BFS time in seconds:       0.000258
+BFS time in seconds:       0.000038
 nodes reachable from node 0: 183 out of 183
 max BFS level: 4
 matrix 63 by 63, 246 entries, from stdin
 number of nodes: 63
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000208
+BFS time in seconds:       0.000065
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 5: same but check each result
-BFS time in seconds:       0.000149
+BFS time in seconds:       0.000036
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 6: apply unary operator
-BFS time in seconds:       0.000217
+BFS time in seconds:       0.000043
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 6: same but check each result
-BFS time in seconds:       0.000208
+BFS time in seconds:       0.000036
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 matrix 63 by 63, 246 entries, from stdin
 number of nodes: 63
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000205
+BFS time in seconds:       0.000063
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 5: same but check each result
-BFS time in seconds:       0.000149
+BFS time in seconds:       0.000035
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 6: apply unary operator
-BFS time in seconds:       0.000216
+BFS time in seconds:       0.000047
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 
 method 6: same but check each result
-BFS time in seconds:       0.000207
+BFS time in seconds:       0.000035
 nodes reachable from node 0: 63 out of 63
 max BFS level: 6
 matrix 78 by 78, 204 entries, from stdin
 number of nodes: 78
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000313
+BFS time in seconds:       0.000087
 nodes reachable from node 0: 78 out of 78
 max BFS level: 10
 
 method 5: same but check each result
-BFS time in seconds:       0.000235
+BFS time in seconds:       0.000061
 nodes reachable from node 0: 78 out of 78
 max BFS level: 10
 
 method 6: apply unary operator
-BFS time in seconds:       0.000348
+BFS time in seconds:       0.000071
 nodes reachable from node 0: 78 out of 78
 max BFS level: 10
 
 method 6: same but check each result
-BFS time in seconds:       0.000411
+BFS time in seconds:       0.000057
 nodes reachable from node 0: 78 out of 78
 max BFS level: 10
 matrix 982 by 982, 99840 entries, from stdin
 number of nodes: 982
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000806
+BFS time in seconds:       0.000507
 nodes reachable from node 0: 933 out of 982
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.000582
+BFS time in seconds:       0.000453
 nodes reachable from node 0: 933 out of 982
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.000678
+BFS time in seconds:       0.000453
 nodes reachable from node 0: 933 out of 982
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.000641
+BFS time in seconds:       0.000441
 nodes reachable from node 0: 933 out of 982
 max BFS level: 5
 matrix 67 by 67, 576 entries, from stdin
 number of nodes: 67
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.000201
+BFS time in seconds:       0.000068
 nodes reachable from node 0: 67 out of 67
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.000124
+BFS time in seconds:       0.000034
 nodes reachable from node 0: 67 out of 67
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.000174
+BFS time in seconds:       0.000044
 nodes reachable from node 0: 67 out of 67
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.000163
+BFS time in seconds:       0.000034
 nodes reachable from node 0: 67 out of 67
 max BFS level: 5
-Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.084 sec
+Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.114 sec
 number of nodes: 120801
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.092873
+BFS time in seconds:       0.062683
 nodes reachable from node 0: 120801 out of 120801
 max BFS level: 201
 
 method 5: same but check each result
-BFS time in seconds:       0.089280
+BFS time in seconds:       0.048605
 nodes reachable from node 0: 120801 out of 120801
 max BFS level: 201
 
 method 6: apply unary operator
-BFS time in seconds:       0.120832
+BFS time in seconds:       0.044805
 nodes reachable from node 0: 120801 out of 120801
 max BFS level: 201
 
 method 6: same but check each result
-BFS time in seconds:       0.123596
+BFS time in seconds:       0.047597
 nodes reachable from node 0: 120801 out of 120801
 max BFS level: 201
-random 10000 by 10000, nz: 199777, method 0 time 0.046 sec
+random 10000 by 10000, nz: 199777, method 0 time 0.023 sec
 number of nodes: 10000
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.006886
+BFS time in seconds:       0.001472
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.006159
+BFS time in seconds:       0.001356
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.006411
+BFS time in seconds:       0.001365
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.006403
+BFS time in seconds:       0.001336
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
-random 10000 by 10000, nz: 199777, method 1 time 0.034 sec
+random 10000 by 10000, nz: 199777, method 1 time 0.019 sec
 number of nodes: 10000
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.006534
+BFS time in seconds:       0.001486
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 5: same but check each result
-BFS time in seconds:       0.005678
+BFS time in seconds:       0.001357
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 6: apply unary operator
-BFS time in seconds:       0.005877
+BFS time in seconds:       0.001401
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
 
 method 6: same but check each result
-BFS time in seconds:       0.005922
+BFS time in seconds:       0.001358
 nodes reachable from node 0: 10000 out of 10000
 max BFS level: 5
-random 100000 by 100000, nz: 19980256, method 0 time 2.134 sec
+random 100000 by 100000, nz: 19980256, method 0 time 2.289 sec
 number of nodes: 100000
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.112415
+BFS time in seconds:       0.057052
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 5: same but check each result
-BFS time in seconds:       0.108608
+BFS time in seconds:       0.048007
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 6: apply unary operator
-BFS time in seconds:       0.111228
+BFS time in seconds:       0.046741
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 6: same but check each result
-BFS time in seconds:       0.110731
+BFS time in seconds:       0.047456
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
-random 100000 by 100000, nz: 19980256, method 1 time 1.370 sec
+random 100000 by 100000, nz: 19980256, method 1 time 1.723 sec
 number of nodes: 100000
 
 method 5: vector assign and reduce:
-BFS time in seconds:       0.113270
+BFS time in seconds:       0.057582
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 5: same but check each result
-BFS time in seconds:       0.103206
+BFS time in seconds:       0.049565
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 6: apply unary operator
-BFS time in seconds:       0.100382
+BFS time in seconds:       0.046042
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
 
 method 6: same but check each result
-BFS time in seconds:       0.100796
+BFS time in seconds:       0.046488
 nodes reachable from node 0: 100000 out of 100000
 max BFS level: 4
diff --git a/Demo/Output/mis_demo.out b/Demo/Output/mis_demo.out
index 76f953fce5..a21186fe04 100644
--- a/Demo/Output/mis_demo.out
+++ b/Demo/Output/mis_demo.out
@@ -1,220 +1,220 @@
 Wathen: nx 4 ny 4 n 65 nz 752 method 0, time: 0.000 sec
-MIS time in seconds:       0.007081
-MIS time in seconds:       0.001925
+MIS time in seconds:       0.006143
+MIS time in seconds:       0.000209
 independent set found: 13 of 65 nodes
 independent set found: 13 of 65 nodes
 isize: 13 13
-MIS time in seconds:       0.003826
-MIS time in seconds:       0.001550
+MIS time in seconds:       0.000217
+MIS time in seconds:       0.000185
 independent set found: 14 of 65 nodes
 independent set found: 14 of 65 nodes
 isize: 14 14
 random 5 by 5, nz: 18, method 1 time 0.000 sec
-MIS time in seconds:       0.005930
-MIS time in seconds:       0.002834
+MIS time in seconds:       0.005617
+MIS time in seconds:       0.000083
 independent set found: 1 of 5 nodes
 independent set found: 1 of 5 nodes
 isize: 1 1
-MIS time in seconds:       0.001173
-MIS time in seconds:       0.001132
+MIS time in seconds:       0.000131
+MIS time in seconds:       0.000117
 independent set found: 2 of 5 nodes
 independent set found: 2 of 5 nodes
 isize: 2 2
 matrix 3 by 3, 0 entries, from stdin
-MIS time in seconds:       0.003483
-MIS time in seconds:       0.000171
+MIS time in seconds:       0.003173
+MIS time in seconds:       0.000034
 independent set found: 3 of 3 nodes
 independent set found: 3 of 3 nodes
 isize: 3 3
-MIS time in seconds:       0.000176
-MIS time in seconds:       0.000156
+MIS time in seconds:       0.000039
+MIS time in seconds:       0.000031
 independent set found: 3 of 3 nodes
 independent set found: 3 of 3 nodes
 isize: 3 3
 matrix 4 by 4, 4 entries, from stdin
-MIS time in seconds:       0.005901
-MIS time in seconds:       0.000815
+MIS time in seconds:       0.000671
+MIS time in seconds:       0.000080
 independent set found: 2 of 4 nodes
 independent set found: 2 of 4 nodes
 isize: 2 2
-MIS time in seconds:       0.000681
-MIS time in seconds:       0.000625
+MIS time in seconds:       0.000093
+MIS time in seconds:       0.000074
 independent set found: 2 of 4 nodes
 independent set found: 2 of 4 nodes
 isize: 2 2
 matrix 4 by 4, 10 entries, from stdin
-MIS time in seconds:       0.006479
-MIS time in seconds:       0.003322
+MIS time in seconds:       0.004128
+MIS time in seconds:       0.000124
 independent set found: 2 of 4 nodes
 independent set found: 2 of 4 nodes
 isize: 2 2
-MIS time in seconds:       0.000645
-MIS time in seconds:       0.000597
+MIS time in seconds:       0.000089
+MIS time in seconds:       0.000073
 independent set found: 2 of 4 nodes
 independent set found: 2 of 4 nodes
 isize: 2 2
 matrix 7 by 7, 16 entries, from stdin
-MIS time in seconds:       0.006091
-MIS time in seconds:       0.001187
+MIS time in seconds:       0.002746
+MIS time in seconds:       0.000122
 independent set found: 4 of 7 nodes
 independent set found: 4 of 7 nodes
 isize: 4 4
-MIS time in seconds:       0.001010
-MIS time in seconds:       0.000953
+MIS time in seconds:       0.000128
+MIS time in seconds:       0.000114
 independent set found: 3 of 7 nodes
 independent set found: 3 of 7 nodes
 isize: 3 3
 matrix 304 by 304, 876 entries, from stdin
-MIS time in seconds:       0.004515
-MIS time in seconds:       0.001625
+MIS time in seconds:       0.009878
+MIS time in seconds:       0.000203
 independent set found: 215 of 304 nodes
 independent set found: 215 of 304 nodes
 isize: 215 215
-MIS time in seconds:       0.003551
-MIS time in seconds:       0.003702
+MIS time in seconds:       0.000198
+MIS time in seconds:       0.000185
 independent set found: 216 of 304 nodes
 independent set found: 216 of 304 nodes
 isize: 216 216
 matrix 48 by 48, 352 entries, from stdin
-MIS time in seconds:       0.004477
-MIS time in seconds:       0.001085
+MIS time in seconds:       0.001811
+MIS time in seconds:       0.000139
 independent set found: 12 of 48 nodes
 independent set found: 12 of 48 nodes
 isize: 12 12
-MIS time in seconds:       0.001466
-MIS time in seconds:       0.001048
+MIS time in seconds:       0.000147
+MIS time in seconds:       0.000134
 independent set found: 11 of 48 nodes
 independent set found: 11 of 48 nodes
 isize: 11 11
 matrix 4884 by 4884, 285494 entries, from stdin
-MIS time in seconds:       0.023170
-MIS time in seconds:       0.022117
+MIS time in seconds:       0.007108
+MIS time in seconds:       0.007478
 independent set found: 286 of 4884 nodes
 independent set found: 286 of 4884 nodes
 isize: 286 286
-MIS time in seconds:       0.023330
-MIS time in seconds:       0.016372
+MIS time in seconds:       0.003445
+MIS time in seconds:       0.003873
 independent set found: 284 of 4884 nodes
 independent set found: 284 of 4884 nodes
 isize: 284 284
 matrix 183 by 183, 1402 entries, from stdin
-MIS time in seconds:       0.008866
-MIS time in seconds:       0.003989
+MIS time in seconds:       0.001380
+MIS time in seconds:       0.000339
 independent set found: 91 of 183 nodes
 independent set found: 91 of 183 nodes
 isize: 91 91
-MIS time in seconds:       0.003754
-MIS time in seconds:       0.001883
+MIS time in seconds:       0.000254
+MIS time in seconds:       0.000231
 independent set found: 87 of 183 nodes
 independent set found: 87 of 183 nodes
 isize: 87 87
 matrix 63 by 63, 246 entries, from stdin
-MIS time in seconds:       0.005071
-MIS time in seconds:       0.001605
+MIS time in seconds:       0.003807
+MIS time in seconds:       0.000196
 independent set found: 23 of 63 nodes
 independent set found: 23 of 63 nodes
 isize: 23 23
-MIS time in seconds:       0.001287
-MIS time in seconds:       0.001258
+MIS time in seconds:       0.000167
+MIS time in seconds:       0.000142
 independent set found: 23 of 63 nodes
 independent set found: 23 of 63 nodes
 isize: 23 23
 matrix 63 by 63, 246 entries, from stdin
-MIS time in seconds:       0.004725
-MIS time in seconds:       0.001605
+MIS time in seconds:       0.000688
+MIS time in seconds:       0.000184
 independent set found: 25 of 63 nodes
 independent set found: 25 of 63 nodes
 isize: 25 25
-MIS time in seconds:       0.001624
-MIS time in seconds:       0.003202
+MIS time in seconds:       0.000152
+MIS time in seconds:       0.000139
 independent set found: 26 of 63 nodes
 independent set found: 26 of 63 nodes
 isize: 26 26
 matrix 78 by 78, 204 entries, from stdin
-MIS time in seconds:       0.004953
-MIS time in seconds:       0.001548
+MIS time in seconds:       0.003376
+MIS time in seconds:       0.000188
 independent set found: 51 of 78 nodes
 independent set found: 51 of 78 nodes
 isize: 51 51
-MIS time in seconds:       0.003901
-MIS time in seconds:       0.001590
+MIS time in seconds:       0.000192
+MIS time in seconds:       0.000179
 independent set found: 50 of 78 nodes
 independent set found: 50 of 78 nodes
 isize: 50 50
 matrix 982 by 982, 99840 entries, from stdin
-MIS time in seconds:       0.005524
-MIS time in seconds:       0.010821
+MIS time in seconds:       0.002870
+MIS time in seconds:       0.002080
 independent set found: 355 of 982 nodes
 independent set found: 355 of 982 nodes
 isize: 355 355
-MIS time in seconds:       0.005706
-MIS time in seconds:       0.005738
+MIS time in seconds:       0.002066
+MIS time in seconds:       0.002033
 independent set found: 362 of 982 nodes
 independent set found: 362 of 982 nodes
 isize: 362 362
 matrix 67 by 67, 574 entries, from stdin
-MIS time in seconds:       0.007276
-MIS time in seconds:       0.004133
+MIS time in seconds:       0.003863
+MIS time in seconds:       0.001124
 independent set found: 17 of 67 nodes
 independent set found: 17 of 67 nodes
 isize: 17 17
-MIS time in seconds:       0.004775
-MIS time in seconds:       0.004031
+MIS time in seconds:       0.000197
+MIS time in seconds:       0.000184
 independent set found: 17 of 67 nodes
 independent set found: 17 of 67 nodes
 isize: 17 17
-Wathen: nx 200 ny 200 n 120801 nz 1762400 method 0, time: 0.109 sec
-MIS time in seconds:       0.079212
-MIS time in seconds:       0.069833
+Wathen: nx 200 ny 200 n 120801 nz 1762400 method 0, time: 0.130 sec
+MIS time in seconds:       0.052224
+MIS time in seconds:       0.043820
 independent set found: 18220 of 120801 nodes
 independent set found: 18220 of 120801 nodes
 isize: 18220 18220
-MIS time in seconds:       0.069278
-MIS time in seconds:       0.070092
+MIS time in seconds:       0.047010
+MIS time in seconds:       0.043788
 independent set found: 18241 of 120801 nodes
 independent set found: 18241 of 120801 nodes
 isize: 18241 18241
-random 10000 by 10000, nz: 199768, method 0 time 0.039 sec
-MIS time in seconds:       0.035086
-MIS time in seconds:       0.046511
+random 10000 by 10000, nz: 199768, method 0 time 0.023 sec
+MIS time in seconds:       0.007380
+MIS time in seconds:       0.006517
 independent set found: 1677 of 10000 nodes
 independent set found: 1677 of 10000 nodes
 isize: 1677 1677
-MIS time in seconds:       0.048538
-MIS time in seconds:       0.031348
+MIS time in seconds:       0.005862
+MIS time in seconds:       0.005842
 independent set found: 1664 of 10000 nodes
 independent set found: 1664 of 10000 nodes
 isize: 1664 1664
-random 10000 by 10000, nz: 199768, method 1 time 0.028 sec
-MIS time in seconds:       0.028670
-MIS time in seconds:       0.030404
+random 10000 by 10000, nz: 199768, method 1 time 0.019 sec
+MIS time in seconds:       0.006514
+MIS time in seconds:       0.006300
 independent set found: 1677 of 10000 nodes
 independent set found: 1677 of 10000 nodes
 isize: 1677 1677
-MIS time in seconds:       0.030488
-MIS time in seconds:       0.030705
+MIS time in seconds:       0.007062
+MIS time in seconds:       0.006443
 independent set found: 1664 of 10000 nodes
 independent set found: 1664 of 10000 nodes
 isize: 1664 1664
-random 100000 by 100000, nz: 19980330, method 0 time 2.136 sec
-MIS time in seconds:       0.258537
-MIS time in seconds:       0.234718
+random 100000 by 100000, nz: 19980330, method 0 time 2.185 sec
+MIS time in seconds:       0.215470
+MIS time in seconds:       0.213781
 independent set found: 2799 of 100000 nodes
 independent set found: 2799 of 100000 nodes
 isize: 2799 2799
-MIS time in seconds:       0.276934
-MIS time in seconds:       0.244382
+MIS time in seconds:       0.222207
+MIS time in seconds:       0.220219
 independent set found: 2815 of 100000 nodes
 independent set found: 2815 of 100000 nodes
 isize: 2815 2815
-random 100000 by 100000, nz: 19980330, method 1 time 1.372 sec
-MIS time in seconds:       0.241853
-MIS time in seconds:       0.232144
+random 100000 by 100000, nz: 19980330, method 1 time 1.743 sec
+MIS time in seconds:       0.215240
+MIS time in seconds:       0.213371
 independent set found: 2799 of 100000 nodes
 independent set found: 2799 of 100000 nodes
 isize: 2799 2799
-MIS time in seconds:       0.261282
-MIS time in seconds:       0.238317
+MIS time in seconds:       0.222707
+MIS time in seconds:       0.223993
 independent set found: 2815 of 100000 nodes
 independent set found: 2815 of 100000 nodes
 isize: 2815 2815
diff --git a/Demo/Output/openmp_demo.out b/Demo/Output/openmp_demo.out
index 13daf62aa1..b12e3c03cf 100644
--- a/Demo/Output/openmp_demo.out
+++ b/Demo/Output/openmp_demo.out
@@ -5,19 +5,19 @@ User threads in this program are OpenMP threads.
 
 ================= worker 0 starts:
 
-================= worker 7 starts:
-
-================= worker 1 starts:
+================= worker 4 starts:
 
 ================= worker 3 starts:
 
 ================= worker 5 starts:
 
-================= worker 2 starts:
+================= worker 1 starts:
+
+================= worker 7 starts:
 
 ================= worker 6 starts:
 
-================= worker 4 starts:
+================= worker 2 starts:
 
 ----------------- worker 0 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
@@ -25,128 +25,67 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1000 out of range; must be < 6
 
 
------------------ worker 3 intentional error:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1003 out of range; must be < 6
-
-
------------------ worker 1 intentional error:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1001 out of range; must be < 6
-
-
 ----------------- worker 5 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1005 out of range; must be < 6
 
 
------------------ worker 2 intentional error:
+----------------- worker 3 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1002 out of range; must be < 6
-
+Row index 1003 out of range; must be < 6
 
------------------ worker 1 is done:
 
-  6x6 GraphBLAS double matrix, sparse by row:
-  A, 36 entries
+----------------- worker 4 intentional error:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1004 out of range; must be < 6
 
-    (0,0)    101001
-    (0,1)    102001
-    (0,2)    103001
-    (0,3)    104001
-    (0,4)    105001
-    (0,5)    106001
-    (1,0)    201001
-    (1,1)    202001
-    (1,2)    203001
-    (1,3)    204001
-    (1,4)    205001
-    (1,5)    206001
-    (2,0)    301001
-    (2,1)    302001
-    (2,2)    303001
-    (2,3)    304001
-    (2,4)    305001
-    (2,5)    306001
-    (3,0)    401001
-    (3,1)    402001
-    (3,2)    403001
-    (3,3)    404001
-    (3,4)    405001
-    (3,5)    406001
-    (4,0)    501001
-    (4,1)    502001
-    (4,2)    503001
-    (4,3)    504001
-    (4,4)    505001
-    (4,5)    506001
-    ...
 
------------------ worker 2 is done:
+----------------- worker 0 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
   A, 36 entries
 
-    (0,0)    101002
-    (0,1)    102002
-    (0,2)    103002
-    (0,3)    104002
-    (0,4)    105002
-    (0,5)    106002
-    (1,0)    201002
-    (1,1)    202002
-    (1,2)    203002
-    (1,3)    204002
-    (1,4)    205002
-    (1,5)    206002
-    (2,0)    301002
-    (2,1)    302002
-    (2,2)    303002
-    (2,3)    304002
-    (2,4)    305002
-    (2,5)    306002
-    (3,0)    401002
-    (3,1)    402002
-    (3,2)    403002
-    (3,3)    404002
-    (3,4)    405002
-    (3,5)    406002
-    (4,0)    501002
-    (4,1)    502002
-    (4,2)    503002
-    (4,3)    504002
-    (4,4)    505002
-    (4,5)    506002
+    (0,0)    101000
+    (0,1)    102000
+    (0,2)    103000
+    (0,3)    104000
+    (0,4)    105000
+    (0,5)    106000
+    (1,0)    201000
+    (1,1)    202000
+    (1,2)    203000
+    (1,3)    204000
+    (1,4)    205000
+    (1,5)    206000
+    (2,0)    301000
+    (2,1)    302000
+    (2,2)    303000
+    (2,3)    304000
+    (2,4)    305000
+    (2,5)    306000
+    (3,0)    401000
+    (3,1)    402000
+    (3,2)    403000
+    (3,3)    404000
+    (3,4)    405000
+    (3,5)    406000
+    (4,0)    501000
+    (4,1)    502000
+    (4,2)    503000
+    (4,3)    504000
+    (4,4)    505000
+    (4,5)    506000
     ...
 
------------------ worker 1 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1001 out of range; must be < 6
-
-
------------------ worker 4 intentional error:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1004 out of range; must be < 6
-
-
 ----------------- worker 7 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1007 out of range; must be < 6
 
 
------------------ worker 2 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1002 out of range; must be < 6
-
-
 ----------------- worker 5 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
@@ -184,43 +123,6 @@ Row index 1002 out of range; must be < 6
     (4,5)    506005
     ...
 
------------------ worker 4 is done:
-
-  6x6 GraphBLAS double matrix, sparse by row:
-  A, 36 entries
-
-    (0,0)    101004
-    (0,1)    102004
-    (0,2)    103004
-    (0,3)    104004
-    (0,4)    105004
-    (0,5)    106004
-    (1,0)    201004
-    (1,1)    202004
-    (1,2)    203004
-    (1,3)    204004
-    (1,4)    205004
-    (1,5)    206004
-    (2,0)    301004
-    (2,1)    302004
-    (2,2)    303004
-    (2,3)    304004
-    (2,4)    305004
-    (2,5)    306004
-    (3,0)    401004
-    (3,1)    402004
-    (3,2)    403004
-    (3,3)    404004
-    (3,4)    405004
-    (3,5)    406004
-    (4,0)    501004
-    (4,1)    502004
-    (4,2)    503004
-    (4,3)    504004
-    (4,4)    505004
-    (4,5)    506004
-    ...
-
 ----------------- worker 3 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
@@ -258,16 +160,28 @@ Row index 1002 out of range; must be < 6
     (4,5)    506003
     ...
 
------------------ worker 5 error should be same:
+----------------- worker 0 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1005 out of range; must be < 6
+Row index 1000 out of range; must be < 6
 
 
------------------ worker 4 error should be same:
+----------------- worker 6 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1004 out of range; must be < 6
+Row index 1006 out of range; must be < 6
+
+
+----------------- worker 1 intentional error:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1001 out of range; must be < 6
+
+
+----------------- worker 5 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1005 out of range; must be < 6
 
 
 ----------------- worker 7 is done:
@@ -307,59 +221,41 @@ Row index 1004 out of range; must be < 6
     (4,5)    506007
     ...
 
------------------ worker 6 intentional error:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1006 out of range; must be < 6
-
-
------------------ worker 7 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1007 out of range; must be < 6
-
-
------------------ worker 3 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1003 out of range; must be < 6
-
-
------------------ worker 0 is done:
+----------------- worker 1 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
   A, 36 entries
 
-    (0,0)    101000
-    (0,1)    102000
-    (0,2)    103000
-    (0,3)    104000
-    (0,4)    105000
-    (0,5)    106000
-    (1,0)    201000
-    (1,1)    202000
-    (1,2)    203000
-    (1,3)    204000
-    (1,4)    205000
-    (1,5)    206000
-    (2,0)    301000
-    (2,1)    302000
-    (2,2)    303000
-    (2,3)    304000
-    (2,4)    305000
-    (2,5)    306000
-    (3,0)    401000
-    (3,1)    402000
-    (3,2)    403000
-    (3,3)    404000
-    (3,4)    405000
-    (3,5)    406000
-    (4,0)    501000
-    (4,1)    502000
-    (4,2)    503000
-    (4,3)    504000
-    (4,4)    505000
-    (4,5)    506000
+    (0,0)    101001
+    (0,1)    102001
+    (0,2)    103001
+    (0,3)    104001
+    (0,4)    105001
+    (0,5)    106001
+    (1,0)    201001
+    (1,1)    202001
+    (1,2)    203001
+    (1,3)    204001
+    (1,4)    205001
+    (1,5)    206001
+    (2,0)    301001
+    (2,1)    302001
+    (2,2)    303001
+    (2,3)    304001
+    (2,4)    305001
+    (2,5)    306001
+    (3,0)    401001
+    (3,1)    402001
+    (3,2)    403001
+    (3,3)    404001
+    (3,4)    405001
+    (3,5)    406001
+    (4,0)    501001
+    (4,1)    502001
+    (4,2)    503001
+    (4,3)    504001
+    (4,4)    505001
+    (4,5)    506001
     ...
 
 ----------------- worker 6 is done:
@@ -399,10 +295,16 @@ Row index 1003 out of range; must be < 6
     (4,5)    506006
     ...
 
------------------ worker 0 error should be same:
+----------------- worker 7 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1000 out of range; must be < 6
+Row index 1007 out of range; must be < 6
+
+
+----------------- worker 3 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1003 out of range; must be < 6
 
 
 ----------------- worker 6 error should be same:
@@ -411,6 +313,104 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1006 out of range; must be < 6
 
 
+----------------- worker 4 is done:
+
+  6x6 GraphBLAS double matrix, sparse by row:
+  A, 36 entries
+
+    (0,0)    101004
+    (0,1)    102004
+    (0,2)    103004
+    (0,3)    104004
+    (0,4)    105004
+    (0,5)    106004
+    (1,0)    201004
+    (1,1)    202004
+    (1,2)    203004
+    (1,3)    204004
+    (1,4)    205004
+    (1,5)    206004
+    (2,0)    301004
+    (2,1)    302004
+    (2,2)    303004
+    (2,3)    304004
+    (2,4)    305004
+    (2,5)    306004
+    (3,0)    401004
+    (3,1)    402004
+    (3,2)    403004
+    (3,3)    404004
+    (3,4)    405004
+    (3,5)    406004
+    (4,0)    501004
+    (4,1)    502004
+    (4,2)    503004
+    (4,3)    504004
+    (4,4)    505004
+    (4,5)    506004
+    ...
+
+----------------- worker 1 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1001 out of range; must be < 6
+
+
+----------------- worker 4 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1004 out of range; must be < 6
+
+
+----------------- worker 2 intentional error:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1002 out of range; must be < 6
+
+
+----------------- worker 2 is done:
+
+  6x6 GraphBLAS double matrix, sparse by row:
+  A, 36 entries
+
+    (0,0)    101002
+    (0,1)    102002
+    (0,2)    103002
+    (0,3)    104002
+    (0,4)    105002
+    (0,5)    106002
+    (1,0)    201002
+    (1,1)    202002
+    (1,2)    203002
+    (1,3)    204002
+    (1,4)    205002
+    (1,5)    206002
+    (2,0)    301002
+    (2,1)    302002
+    (2,2)    303002
+    (2,3)    304002
+    (2,4)    305002
+    (2,5)    306002
+    (3,0)    401002
+    (3,1)    402002
+    (3,2)    403002
+    (3,3)    404002
+    (3,4)    405002
+    (3,5)    406002
+    (4,0)    501002
+    (4,1)    502002
+    (4,2)    503002
+    (4,3)    504002
+    (4,4)    505002
+    (4,5)    506002
+    ...
+
+----------------- worker 2 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1002 out of range; must be < 6
+
+
 ---- Master prints matrix 0
 
   6x6 GraphBLAS double matrix, sparse by row:
@@ -709,7 +709,7 @@ Row index 1006 out of range; must be < 6
 
 
 ---- Master thread prints an error message:
-master -1 : Error: GraphBLAS error: GrB_NULL_POINTER
+Error: GraphBLAS error: GrB_NULL_POINTER
 function: GrB_Matrix_new (&A, type, nrows, ncols)
 Required argument is null: [A]
 
diff --git a/Demo/Output/pagerank_demo.out b/Demo/Output/pagerank_demo.out
index c67ba4f081..ec04b6b52e 100644
--- a/Demo/Output/pagerank_demo.out
+++ b/Demo/Output/pagerank_demo.out
@@ -1,9 +1,9 @@
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 3
 nrows 3 ncols 3
-time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000080
+time to prune self-edges:     0.000000
+time to build the graph with GrB_Matrix_build:     0.000033
 leave A as-is
 
 Matrix from file:
@@ -14,20 +14,19 @@ Matrix from file:
     (0,0)   1
     (1,1)   1
     (2,2)   1
-n 3 edges 3  dpagerank time :       0.001839 iters: 20
-n 3 edges 3  ipagerank time :       0.001615 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 3 edges 3  dpagerank time :       0.000218 iters: 1
+n 3 edges 3  dpagerank time :       0.000128 iters: 20
+n 3 edges 3  ipagerank time :       0.000080 iters: 20
+n 3 edges 3  dpagerank time :       0.000031 iters: 1
 Top 3 nodes:
     0 d:[     0 :   3.38861694e-01] i:[     0 :   2.86096486e+08] x:[     0 :   3.33333333e-01]
     1 d:[     2 :   3.33331362e-01] i:[     2 :   2.81427298e+08] x:[     1 :   3.33333333e-01]mismatch
     2 d:[     1 :   3.27806943e-01] i:[     1 :   2.76763104e+08] x:[     2 :   3.33333333e-01]mismatch
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 8
 nrows 4 ncols 4
 time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000116
+time to build the graph with GrB_Matrix_build:     0.000033
 leave A as-is
 
 Matrix from file:
@@ -43,21 +42,20 @@ Matrix from file:
     (2,3)   1
     (3,2)   1
     (3,3)   1
-n 4 edges 8  dpagerank time :       0.001893 iters: 20
-n 4 edges 8  ipagerank time :       0.001678 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 4 edges 8  dpagerank time :       0.000238 iters: 1
+n 4 edges 8  dpagerank time :       0.000152 iters: 20
+n 4 edges 8  ipagerank time :       0.000108 iters: 20
+n 4 edges 8  dpagerank time :       0.000029 iters: 1
 Top 4 nodes:
     0 d:[     0 :   2.53865987e-01] i:[     0 :   2.04675869e+08] x:[     0 :   2.50000000e-01]
     1 d:[     1 :   2.53865987e-01] i:[     1 :   2.04675869e+08] x:[     1 :   2.50000000e-01]
     2 d:[     2 :   2.46134013e-01] i:[     2 :   1.98442073e+08] x:[     2 :   2.50000000e-01]
     3 d:[     3 :   2.46134013e-01] i:[     3 :   1.98442073e+08] x:[     3 :   2.50000000e-01]
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 10
 nrows 4 ncols 4
-time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000078
+time to prune self-edges:     0.000000
+time to build the graph with GrB_Matrix_build:     0.000033
 leave A as-is
 
 Matrix from file:
@@ -75,21 +73,20 @@ Matrix from file:
     (3,0)   1
     (3,1)   1
     (3,3)   1
-n 4 edges 10  dpagerank time :       0.001313 iters: 20
-n 4 edges 10  ipagerank time :       0.001190 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 4 edges 10  dpagerank time :       0.001596 iters: 12
+n 4 edges 10  dpagerank time :       0.000144 iters: 20
+n 4 edges 10  ipagerank time :       0.000110 iters: 20
+n 4 edges 10  dpagerank time :       0.000108 iters: 12
 Top 4 nodes:
     0 d:[     0 :   2.83169320e-01] i:[     0 :   2.28301264e+08] x:[     0 :   2.83169438e-01]
     1 d:[     1 :   2.79491864e-01] i:[     1 :   2.25336367e+08] x:[     1 :   2.79491122e-01]
     2 d:[     2 :   2.74516447e-01] i:[     2 :   2.21325008e+08] x:[     2 :   2.74518319e-01]
     3 d:[     3 :   1.62822368e-01] i:[     3 :   1.31273234e+08] x:[     3 :   1.62821121e-01]
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 224
 nrows 48 ncols 48
-time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000177
+time to prune self-edges:     0.000000
+time to build the graph with GrB_Matrix_build:     0.000049
 leave A as-is
 
 Matrix from file:
@@ -119,10 +116,9 @@ Matrix from file:
     (8,8)   1
     (9,1)   1
     ...
-n 48 edges 224  dpagerank time :       0.001844 iters: 20
-n 48 edges 224  ipagerank time :       0.001657 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 48 edges 224  dpagerank time :       0.002612 iters: 16
+n 48 edges 224  dpagerank time :       0.000188 iters: 20
+n 48 edges 224  ipagerank time :       0.000180 iters: 20
+n 48 edges 224  dpagerank time :       0.000289 iters: 16
 Top 48 nodes:
     0 d:[     2 :   2.00526870e-01] i:[     2 :   2.04212630e+08] x:[     2 :   2.01807617e-01]
     1 d:[     0 :   1.93646215e-01] i:[     0 :   1.97205508e+08] x:[     0 :   1.92211713e-01]
@@ -173,11 +169,11 @@ Top 48 nodes:
    46 d:[    47 :   3.45092025e-03] i:[    47 :   3.51434800e+06] x:[    47 :   3.45092025e-03]
    47 d:[    45 :   3.41530055e-03] i:[    45 :   3.47807400e+06] x:[    45 :   3.41530055e-03]
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 147631
 nrows 4884 ncols 4884
 time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.019238
+time to build the graph with GrB_Matrix_build:     0.006729
 leave A as-is
 
 Matrix from file:
@@ -211,10 +207,9 @@ Matrix from file:
     (8,8)   1
     (9,3)   1
     ...
-n 4884 edges 147631  dpagerank time :       0.015467 iters: 20
-n 4884 edges 147631  ipagerank time :       0.016940 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 4884 edges 147631  dpagerank time :       0.200301 iters: 46
+n 4884 edges 147631  dpagerank time :       0.008657 iters: 20
+n 4884 edges 147631  ipagerank time :       0.008946 iters: 20
+n 4884 edges 147631  dpagerank time :       0.042111 iters: 46
 Top 4884 nodes:
     0 d:[     0 :   2.91912462e-02] i:[     0 :   3.10526730e+07] x:[     0 :   3.32228314e-02]
     1 d:[     1 :   5.00313528e-03] i:[     1 :   5.32204600e+06] x:[     1 :   4.99132478e-03]
@@ -5101,11 +5096,11 @@ Top 4884 nodes:
  4882 d:[  4881 :   3.35779202e-05] i:[  4881 :   3.57160000e+04] x:[  4881 :   3.35779202e-05]
  4883 d:[  4882 :   3.20757501e-05] i:[  4882 :   3.41180000e+04] x:[  4882 :   3.20757501e-05]
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 1069
 nrows 183 ncols 183
-time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000270
+time to prune self-edges:     0.000000
+time to build the graph with GrB_Matrix_build:     0.000138
 leave A as-is
 
 Matrix from file:
@@ -5144,10 +5139,9 @@ Matrix from file:
     (0,90)   1
     (0,91)   1
     ...
-n 183 edges 1069  dpagerank time :       0.002135 iters: 20
-n 183 edges 1069  ipagerank time :       0.001954 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 183 edges 1069  dpagerank time :       0.006172 iters: 37
+n 183 edges 1069  dpagerank time :       0.000408 iters: 20
+n 183 edges 1069  ipagerank time :       0.000374 iters: 20
+n 183 edges 1069  dpagerank time :       0.001947 iters: 37
 Top 183 nodes:
     0 d:[   136 :   1.44427493e-01] i:[   136 :   1.51941845e+08] x:[   136 :   1.44252744e-01]
     1 d:[     0 :   6.91824797e-02] i:[     0 :   7.27819210e+07] x:[     0 :   6.91214864e-02]
@@ -5333,11 +5327,11 @@ Top 183 nodes:
   181 d:[    71 :   9.32960149e-04] i:[    71 :   9.81500000e+05] x:[    71 :   9.32960149e-04]
   182 d:[    52 :   8.65660337e-04] i:[    52 :   9.10699000e+05] x:[    52 :   8.65660337e-04]
 
-pagerank_demo:
+pagerank_demo: nthreads: 8
 ntuples: 299
 nrows 67 ncols 67
-time to prune self-edges:     0.000001
-time to build the graph with GrB_Matrix_build:     0.000108
+time to prune self-edges:     0.000000
+time to build the graph with GrB_Matrix_build:     0.000077
 leave A as-is
 
 Matrix from file:
@@ -5376,10 +5370,9 @@ Matrix from file:
     (7,4)   1
     (7,6)   1
     ...
-n 67 edges 294  dpagerank time :       0.001391 iters: 20
-n 67 edges 294  ipagerank time :       0.001280 iters: 20
-dpagerank2: pagerank objects defined at run-time
-n 67 edges 294  dpagerank time :       0.001450 iters: 11
+n 67 edges 294  dpagerank time :       0.000244 iters: 20
+n 67 edges 294  ipagerank time :       0.000195 iters: 20
+n 67 edges 294  dpagerank time :       0.000278 iters: 11
 Top 67 nodes:
     0 d:[    19 :   3.94516997e-02] i:[    19 :   4.03104180e+07] x:[    19 :   3.94480927e-02]
     1 d:[    30 :   3.14483462e-02] i:[    30 :   3.21328600e+07] x:[    30 :   3.14475525e-02]
diff --git a/Demo/Output/pthread_demo.out b/Demo/Output/pthread_demo.out
index 20dc05608d..f595670914 100644
--- a/Demo/Output/pthread_demo.out
+++ b/Demo/Output/pthread_demo.out
@@ -1,35 +1,38 @@
 Demo: ../build/pthread_demo:
+pthread demo, nthreads: 8
 GraphBLAS is using an OpenMP critical section
 to synchronize user threads.
 User threads in this program are POSIX pthreads.
 
-================= worker 0 starts:
-
 ================= worker 1 starts:
 
 ================= worker 2 starts:
 
+================= worker 0 starts:
+
 ================= worker 3 starts:
 
+================= worker 4 starts:
+
 ----------------- worker 1 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1001 out of range; must be < 6
 
 
------------------ worker 0 intentional error:
+----------------- worker 2 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1000 out of range; must be < 6
+Row index 1002 out of range; must be < 6
 
 
------------------ worker 2 intentional error:
+----------------- worker 0 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1002 out of range; must be < 6
+Row index 1000 out of range; must be < 6
 
 
-================= worker 4 starts:
+================= worker 5 starts:
 
 ----------------- worker 3 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
@@ -37,7 +40,7 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1003 out of range; must be < 6
 
 
-================= worker 5 starts:
+================= worker 6 starts:
 
 ----------------- worker 4 intentional error:
 GraphBLAS error: GrB_INVALID_INDEX
@@ -45,95 +48,50 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1004 out of range; must be < 6
 
 
------------------ worker 2 is done:
+----------------- worker 1 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
   A, 36 entries
 
-    (0,0)    101002
-    (0,1)    102002
-    (0,2)    103002
-    (0,3)    104002
-    (0,4)    105002
-    (0,5)    106002
-================= worker 6 starts:
-
-    (1,0)    201002
-    (1,1)    202002
-    (1,2)    203002
-    (1,3)    204002
-    (1,4)    205002
-    (1,5)    206002
-    (2,0)    301002
-    (2,1)    302002
-    (2,2)    303002
-    (2,3)    304002
-    (2,4)    305002
-    (2,5)    306002
-    (3,0)    401002
-    (3,1)    402002
-    (3,2)    403002
-    (3,3)    404002
-    (3,4)    405002
-    (3,5)    406002
-    (4,0)    501002
-    (4,1)    502002
-    (4,2)    503002
-    (4,3)    504002
-    (4,4)    505002
-    (4,5)    506002
+    (0,0)    101001
+    (0,1)    102001
+    (0,2)    103001
+    (0,3)    104001
+    (0,4)    105001
+    (0,5)    106001
+    (1,0)    201001
+    (1,1)    202001
+    (1,2)    203001
+    (1,3)    204001
+    (1,4)    205001
+    (1,5)    206001
+    (2,0)    301001
+    (2,1)    302001
+    (2,2)    303001
+    (2,3)    304001
+    (2,4)    305001
+    (2,5)    306001
+    (3,0)    401001
+    (3,1)    402001
+    (3,2)    403001
+    (3,3)    404001
+    (3,4)    405001
+    (3,5)    406001
+    (4,0)    501001
+    (4,1)    502001
+    (4,2)    503001
+    (4,3)    504001
+    (4,4)    505001
+    (4,5)    506001
     ...
 
-================= worker 7 starts:
-
------------------ worker 2 error should be same:
+----------------- worker 1 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1002 out of range; must be < 6
-
-
------------------ worker 3 is done:
-
-  6x6 GraphBLAS double matrix, sparse by row:
-  A, 36 entries
-
-    (0,0)    101003
-    (0,1)    102003
-    (0,2)    103003
-    (0,3)    104003
-    (0,4)    105003
-    (0,5)    106003
-    (1,0)    201003
-    (1,1)    202003
-    (1,2)    203003
-    (1,3)    204003
-    (1,4)    205003
-    (1,5)    206003
-    (2,0)    301003
-    (2,1)    302003
-    (2,2)    303003
-    (2,3)    304003
-    (2,4)    305003
-    (2,5)    306003
-    (3,0)    401003
-    (3,1)    402003
-    (3,2)    403003
-    (3,3)    404003
-    (3,4)    405003
-    (3,5)    406003
-    (4,0)    501003
-    (4,1)    502003
-    (4,2)    503003
-    (4,3)    504003
-    (4,4)    505003
-    (4,5)    506003
-    ...
+Row index 1001 out of range; must be < 6
 
------------------ worker 3 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1003 out of range; must be < 6
 
+================= worker 7 starts:
 
 ----------------- worker 0 is done:
 
@@ -178,47 +136,53 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1000 out of range; must be < 6
 
 
------------------ worker 1 is done:
+----------------- worker 3 is done:
 
   6x6 GraphBLAS double matrix, sparse by row:
   A, 36 entries
 
-    (0,0)    101001
-    (0,1)    102001
-    (0,2)    103001
-    (0,3)    104001
-    (0,4)    105001
-    (0,5)    106001
-    (1,0)    201001
-    (1,1)    202001
-    (1,2)    203001
-    (1,3)    204001
-    (1,4)    205001
-    (1,5)    206001
-    (2,0)    301001
-    (2,1)    302001
-    (2,2)    303001
-    (2,3)    304001
-    (2,4)    305001
-    (2,5)    306001
-    (3,0)    401001
-    (3,1)    402001
-    (3,2)    403001
-    (3,3)    404001
-    (3,4)    405001
-    (3,5)    406001
-    (4,0)    501001
-    (4,1)    502001
-    (4,2)    503001
-    (4,3)    504001
-    (4,4)    505001
-    (4,5)    506001
+    (0,0)    101003
+    (0,1)    102003
+    (0,2)    103003
+    (0,3)    104003
+    (0,4)    105003
+    (0,5)    106003
+    (1,0)    201003
+    (1,1)    202003
+    (1,2)    203003
+    (1,3)    204003
+    (1,4)    205003
+    (1,5)    206003
+    (2,0)    301003
+    (2,1)    302003
+    (2,2)    303003
+    (2,3)    304003
+    (2,4)    305003
+    (2,5)    306003
+    (3,0)    401003
+    (3,1)    402003
+    (3,2)    403003
+    (3,3)    404003
+    (3,4)    405003
+    (3,5)    406003
+    (4,0)    501003
+    (4,1)    502003
+    (4,2)    503003
+    (4,3)    504003
+    (4,4)    505003
+    (4,5)    506003
     ...
 
------------------ worker 1 error should be same:
+----------------- worker 3 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1001 out of range; must be < 6
+Row index 1003 out of range; must be < 6
+
+
+----------------- worker 5 intentional error:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1005 out of range; must be < 6
 
 
 ----------------- worker 4 is done:
@@ -258,16 +222,47 @@ Row index 1001 out of range; must be < 6
     (4,5)    506004
     ...
 
------------------ worker 4 error should be same:
-GraphBLAS error: GrB_INVALID_INDEX
-function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1004 out of range; must be < 6
+----------------- worker 2 is done:
 
+  6x6 GraphBLAS double matrix, sparse by row:
+  A, 36 entries
 
------------------ worker 5 intentional error:
+    (0,0)    101002
+    (0,1)    102002
+    (0,2)    103002
+    (0,3)    104002
+    (0,4)    105002
+    (0,5)    106002
+    (1,0)    201002
+    (1,1)    202002
+    (1,2)    203002
+    (1,3)    204002
+    (1,4)    205002
+    (1,5)    206002
+    (2,0)    301002
+    (2,1)    302002
+    (2,2)    303002
+    (2,3)    304002
+    (2,4)    305002
+    (2,5)    306002
+    (3,0)    401002
+    (3,1)    402002
+    (3,2)    403002
+    (3,3)    404002
+    (3,4)    405002
+    (3,5)    406002
+    (4,0)    501002
+    (4,1)    502002
+    (4,2)    503002
+    (4,3)    504002
+    (4,4)    505002
+    (4,5)    506002
+    ...
+
+----------------- worker 2 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1005 out of range; must be < 6
+Row index 1002 out of range; must be < 6
 
 
 ----------------- worker 6 intentional error:
@@ -325,47 +320,10 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1005 out of range; must be < 6
 
 
------------------ worker 6 is done:
-
-  6x6 GraphBLAS double matrix, sparse by row:
-  A, 36 entries
-
-    (0,0)    101006
-    (0,1)    102006
-    (0,2)    103006
-    (0,3)    104006
-    (0,4)    105006
-    (0,5)    106006
-    (1,0)    201006
-    (1,1)    202006
-    (1,2)    203006
-    (1,3)    204006
-    (1,4)    205006
-    (1,5)    206006
-    (2,0)    301006
-    (2,1)    302006
-    (2,2)    303006
-    (2,3)    304006
-    (2,4)    305006
-    (2,5)    306006
-    (3,0)    401006
-    (3,1)    402006
-    (3,2)    403006
-    (3,3)    404006
-    (3,4)    405006
-    (3,5)    406006
-    (4,0)    501006
-    (4,1)    502006
-    (4,2)    503006
-    (4,3)    504006
-    (4,4)    505006
-    (4,5)    506006
-    ...
-
------------------ worker 6 error should be same:
+----------------- worker 4 error should be same:
 GraphBLAS error: GrB_INVALID_INDEX
 function: GrB_Matrix_setElement_INT32 (C, row, col, x)
-Row index 1006 out of range; must be < 6
+Row index 1004 out of range; must be < 6
 
 
 ----------------- worker 7 is done:
@@ -411,6 +369,49 @@ function: GrB_Matrix_setElement_INT32 (C, row, col, x)
 Row index 1007 out of range; must be < 6
 
 
+----------------- worker 6 is done:
+
+  6x6 GraphBLAS double matrix, sparse by row:
+  A, 36 entries
+
+    (0,0)    101006
+    (0,1)    102006
+    (0,2)    103006
+    (0,3)    104006
+    (0,4)    105006
+    (0,5)    106006
+    (1,0)    201006
+    (1,1)    202006
+    (1,2)    203006
+    (1,3)    204006
+    (1,4)    205006
+    (1,5)    206006
+    (2,0)    301006
+    (2,1)    302006
+    (2,2)    303006
+    (2,3)    304006
+    (2,4)    305006
+    (2,5)    306006
+    (3,0)    401006
+    (3,1)    402006
+    (3,2)    403006
+    (3,3)    404006
+    (3,4)    405006
+    (3,5)    406006
+    (4,0)    501006
+    (4,1)    502006
+    (4,2)    503006
+    (4,3)    504006
+    (4,4)    505006
+    (4,5)    506006
+    ...
+
+----------------- worker 6 error should be same:
+GraphBLAS error: GrB_INVALID_INDEX
+function: GrB_Matrix_setElement_INT32 (C, row, col, x)
+Row index 1006 out of range; must be < 6
+
+
 ---- Master prints matrix 0
 
   6x6 GraphBLAS double matrix, sparse by row:
diff --git a/Demo/Output/simple_demo.out b/Demo/Output/simple_demo.out
index 282e716dce..cd420f309c 100644
--- a/Demo/Output/simple_demo.out
+++ b/Demo/Output/simple_demo.out
@@ -1,5 +1,5 @@
-time to call simple_tic 1 million times: 0.0453296
-time to generate 10 million random numbers: 0.326611
+time to call simple_tic 1 million times: 0.0337404
+time to generate 10 million random numbers: 0.178885
 first 10 random numbers:
     0.257524
     0.688456
@@ -11,4 +11,4 @@ first 10 random numbers:
     0.566362
     0.674999
     0.681490
-time to generate 10 million random uint64: 0.176716
+time to generate 10 million random uint64: 0.106817
diff --git a/Demo/Output/tri_demo.out b/Demo/Output/tri_demo.out
index c1196ce408..3aeaced60f 100644
--- a/Demo/Output/tri_demo.out
+++ b/Demo/Output/tri_demo.out
@@ -1,2800 +1,1960 @@
 --------------------------------------------------------------
-Wathen: nx 4 ny 4 n 65 nz 752 method 0, time: 0.001 sec
+Wathen: nx 4 ny 4 n 65 nz 752 method 0, time: 0.000 sec
 
-total time to read A matrix:       0.000781 sec
+total time to read A matrix:       0.000251 sec
 
 n 65 # edges 376
-U=triu(A) time:        0.000069 sec
-L=tril(A) time:        0.000031 sec
+U=triu(A) time:        0.000031 sec
+L=tril(A) time:        0.000010 sec
 
 ------------------------------------- dot product method:
 # triangles 872
-L*U' time (dot):         0.000076 sec
-tricount time:         0.000115 sec (dot product method)
-tri+prep time:         0.000214 sec (incl time to compute L and U)
-compute C time:        0.000076 sec
-reduce (C) time:       0.000038 sec
-rate       1.75 million edges/sec (incl time for U=triu(A))
-rate       3.28 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000037 sec (nthreads: 2 speedup 2.05718)
-tricount time:         0.000052 sec (dot product method)
-tri+prep time:         0.000152 sec (incl time to compute L and U)
-compute C time:        0.000037 sec
-reduce (C) time:       0.000015 sec
-rate       2.47 million edges/sec (incl time for U=triu(A))
-rate       7.19 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000032 sec (nthreads: 4 speedup 2.35925)
-tricount time:         0.000046 sec (dot product method)
-tri+prep time:         0.000146 sec (incl time to compute L and U)
-compute C time:        0.000032 sec
-reduce (C) time:       0.000014 sec
-rate       2.58 million edges/sec (incl time for U=triu(A))
-rate       8.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000032 sec (nthreads: 8 speedup 2.38705)
-tricount time:         0.000046 sec (dot product method)
-tri+prep time:         0.000146 sec (incl time to compute L and U)
-compute C time:        0.000032 sec
-reduce (C) time:       0.000014 sec
-rate       2.58 million edges/sec (incl time for U=triu(A))
-rate       8.20 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000031 sec (nthreads: 16 speedup 2.42413)
-tricount time:         0.000045 sec (dot product method)
-tri+prep time:         0.000145 sec (incl time to compute L and U)
-compute C time:        0.000031 sec
-reduce (C) time:       0.000013 sec
-rate       2.60 million edges/sec (incl time for U=triu(A))
-rate       8.43 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000031 sec (nthreads: 32 speedup 2.46572)
-tricount time:         0.000044 sec (dot product method)
-tri+prep time:         0.000144 sec (incl time to compute L and U)
-compute C time:        0.000031 sec
-reduce (C) time:       0.000013 sec
-rate       2.61 million edges/sec (incl time for U=triu(A))
-rate       8.48 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000044 sec
-tricount time:         0.000059 sec (dot product method)
-tri+prep time:         0.000159 sec (incl time to compute L and U)
-compute C time:        0.000044 sec
-reduce (C) time:       0.000015 sec
-rate       2.37 million edges/sec (incl time for U=triu(A))
-rate       6.40 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000034 sec (nthreads: 2 speedup 1.29542)
-tricount time:         0.000048 sec (dot product method)
-tri+prep time:         0.000147 sec (incl time to compute L and U)
-compute C time:        0.000034 sec
-reduce (C) time:       0.000014 sec
-rate       2.55 million edges/sec (incl time for U=triu(A))
-rate       7.90 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000032 sec (nthreads: 4 speedup 1.37174)
-tricount time:         0.000045 sec (dot product method)
-tri+prep time:         0.000145 sec (incl time to compute L and U)
-compute C time:        0.000032 sec
-reduce (C) time:       0.000013 sec
-rate       2.59 million edges/sec (incl time for U=triu(A))
-rate       8.30 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000033 sec (nthreads: 8 speedup 1.33492)
-tricount time:         0.000046 sec (dot product method)
-tri+prep time:         0.000146 sec (incl time to compute L and U)
-compute C time:        0.000033 sec
-reduce (C) time:       0.000013 sec
-rate       2.58 million edges/sec (incl time for U=triu(A))
-rate       8.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000032 sec (nthreads: 16 speedup 1.3672)
-tricount time:         0.000045 sec (dot product method)
-tri+prep time:         0.000145 sec (incl time to compute L and U)
-compute C time:        0.000032 sec
-reduce (C) time:       0.000013 sec
-rate       2.59 million edges/sec (incl time for U=triu(A))
-rate       8.27 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000034 sec (nthreads: 32 speedup 1.28516)
-tricount time:         0.000048 sec (dot product method)
-tri+prep time:         0.000148 sec (incl time to compute L and U)
-compute C time:        0.000034 sec
-reduce (C) time:       0.000014 sec
-rate       2.54 million edges/sec (incl time for U=triu(A))
-rate       7.84 million edges/sec (just tricount itself)
-
------------------------------------ saxpy method:
-C<L>=L*L time (saxpy):         0.000051 sec
-tricount time:         0.000055 sec (saxpy method)
-tri+prep time:         0.000086 sec (incl time to compute L)
-compute C time:        0.000051 sec
-reduce (C) time:       0.000004 sec
-rate       4.36 million edges/sec (incl time for L=tril(A))
-rate       6.78 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000026 sec (nthreads: 2 speedup 1.98067)
-tricount time:         0.000030 sec (saxpy method)
-tri+prep time:         0.000060 sec (incl time to compute L)
-compute C time:        0.000026 sec
+L*U' time (dot):         0.000048 sec
+tricount time:         0.000058 sec (dot product method)
+tri+prep time:         0.000099 sec (incl time to compute L and U)
+compute C time:        0.000048 sec
+reduce (C) time:       0.000010 sec
+rate     3.79 million edges/sec (incl time for U=triu(A))
+rate     6.47 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000014 sec (nthreads: 2 speedup 3.51141)
+tricount time:         0.000018 sec (dot product method)
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000014 sec
 reduce (C) time:       0.000004 sec
-rate       6.22 million edges/sec (incl time for L=tril(A))
-rate      12.66 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000022 sec (nthreads: 4 speedup 2.35482)
-tricount time:         0.000026 sec (saxpy method)
-tri+prep time:         0.000056 sec (incl time to compute L)
-compute C time:        0.000022 sec
+rate     6.36 million edges/sec (incl time for U=triu(A))
+rate    20.96 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000013 sec (nthreads: 4 speedup 3.66527)
+tricount time:         0.000019 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
+compute C time:        0.000013 sec
+reduce (C) time:       0.000005 sec
+rate     6.30 million edges/sec (incl time for U=triu(A))
+rate    20.32 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec (nthreads: 8 speedup 3.24119)
+tricount time:         0.000019 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000004 sec
-rate       6.68 million edges/sec (incl time for L=tril(A))
-rate      14.71 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000021 sec (nthreads: 8 speedup 2.45174)
-tricount time:         0.000025 sec (saxpy method)
-tri+prep time:         0.000055 sec (incl time to compute L)
-compute C time:        0.000021 sec
+rate     6.27 million edges/sec (incl time for U=triu(A))
+rate    20.03 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000019 sec
+tricount time:         0.000025 sec (dot product method)
+tri+prep time:         0.000067 sec (incl time to compute L and U)
+compute C time:        0.000019 sec
+reduce (C) time:       0.000006 sec
+rate     5.65 million edges/sec (incl time for U=triu(A))
+rate    14.83 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000016 sec (nthreads: 2 speedup 1.23569)
+tricount time:         0.000020 sec (dot product method)
+tri+prep time:         0.000061 sec (incl time to compute L and U)
+compute C time:        0.000016 sec
 reduce (C) time:       0.000004 sec
-rate       6.79 million edges/sec (incl time for L=tril(A))
-rate      15.25 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000020 sec (nthreads: 16 speedup 2.51236)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000055 sec (incl time to compute L)
-compute C time:        0.000020 sec
+rate     6.18 million edges/sec (incl time for U=triu(A))
+rate    19.09 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000013 sec (nthreads: 4 speedup 1.4938)
+tricount time:         0.000018 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
+compute C time:        0.000013 sec
+reduce (C) time:       0.000005 sec
+rate     6.31 million edges/sec (incl time for U=triu(A))
+rate    20.46 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec (nthreads: 8 speedup 1.31382)
+tricount time:         0.000019 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000004 sec
-rate       6.85 million edges/sec (incl time for L=tril(A))
-rate      15.58 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000020 sec (nthreads: 32 speedup 2.54103)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000055 sec (incl time to compute L)
+rate     6.27 million edges/sec (incl time for U=triu(A))
+rate    20.06 million edges/sec (just tricount itself)
+
+----------------------------------- saxpy method:
+C<L>=L*L time (saxpy):         0.000046 sec
+tricount time:         0.000048 sec (saxpy method)
+tri+prep time:         0.000058 sec (incl time to compute L)
+compute C time:        0.000046 sec
+reduce (C) time:       0.000002 sec
+rate     6.51 million edges/sec (incl time for L=tril(A))
+rate     7.83 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000020 sec (nthreads: 2 speedup 2.36859)
+tricount time:         0.000021 sec (saxpy method)
+tri+prep time:         0.000031 sec (incl time to compute L)
 compute C time:        0.000020 sec
-reduce (C) time:       0.000004 sec
-rate       6.88 million edges/sec (incl time for L=tril(A))
-rate      15.73 million edges/sec (just tricount itself)
+reduce (C) time:       0.000001 sec
+rate    12.31 million edges/sec (incl time for L=tril(A))
+rate    18.09 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000019 sec (nthreads: 4 speedup 2.3942)
+tricount time:         0.000021 sec (saxpy method)
+tri+prep time:         0.000031 sec (incl time to compute L)
+compute C time:        0.000019 sec
+reduce (C) time:       0.000002 sec
+rate    12.18 million edges/sec (incl time for L=tril(A))
+rate    17.80 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000025 sec (nthreads: 8 speedup 1.85853)
+tricount time:         0.000026 sec (saxpy method)
+tri+prep time:         0.000036 sec (incl time to compute L)
+compute C time:        0.000025 sec
+reduce (C) time:       0.000001 sec
+rate    10.40 million edges/sec (incl time for L=tril(A))
+rate    14.23 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 random 5 by 5, nz: 18, method 1 time 0.000 sec
 
-total time to read A matrix:       0.000227 sec
+total time to read A matrix:       0.000102 sec
 
 n 5 # edges 9
-U=triu(A) time:        0.000069 sec
-L=tril(A) time:        0.000023 sec
+U=triu(A) time:        0.000035 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 7
-L*U' time (dot):         0.000039 sec
-tricount time:         0.000068 sec (dot product method)
-tri+prep time:         0.000161 sec (incl time to compute L and U)
-compute C time:        0.000039 sec
-reduce (C) time:       0.000030 sec
-rate       0.06 million edges/sec (incl time for U=triu(A))
-rate       0.13 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 2 speedup 2.96188)
-tricount time:         0.000022 sec (dot product method)
-tri+prep time:         0.000115 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
-reduce (C) time:       0.000009 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.41 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 3.95451)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000111 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.50 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 8 speedup 4.06088)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000110 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.52 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 16 speedup 3.98864)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000110 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.52 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 32 speedup 4.15204)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000109 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000007 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.54 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000112 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.46 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 2 speedup 1.11159)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000110 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.53 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 1.14139)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000110 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.53 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 1.1352)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000110 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.53 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 1.17036)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000109 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000007 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.55 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000012 sec (nthreads: 32 speedup 0.903981)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000112 sec (incl time to compute L and U)
-compute C time:        0.000012 sec
+L*U' time (dot):         0.000035 sec
+tricount time:         0.000043 sec (dot product method)
+tri+prep time:         0.000084 sec (incl time to compute L and U)
+compute C time:        0.000035 sec
 reduce (C) time:       0.000008 sec
-rate       0.08 million edges/sec (incl time for U=triu(A))
-rate       0.46 million edges/sec (just tricount itself)
+rate     0.11 million edges/sec (incl time for U=triu(A))
+rate     0.21 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 2 speedup 6.64765)
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000003 sec
+rate     0.18 million edges/sec (incl time for U=triu(A))
+rate     1.12 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 4 speedup 7.46184)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000048 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000002 sec
+rate     0.19 million edges/sec (incl time for U=triu(A))
+rate     1.33 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 13.0022)
+tricount time:         0.000004 sec (dot product method)
+tri+prep time:         0.000045 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000002 sec
+rate     0.20 million edges/sec (incl time for U=triu(A))
+rate     2.06 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000048 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000002 sec
+rate     0.19 million edges/sec (incl time for U=triu(A))
+rate     1.29 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 1.25108)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000002 sec
+rate     0.19 million edges/sec (incl time for U=triu(A))
+rate     1.49 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 1.09584)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000002 sec
+rate     0.19 million edges/sec (incl time for U=triu(A))
+rate     1.42 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 2.01637)
+tricount time:         0.000004 sec (dot product method)
+tri+prep time:         0.000045 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
+reduce (C) time:       0.000002 sec
+rate     0.20 million edges/sec (incl time for U=triu(A))
+rate     2.24 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000083 sec
-tricount time:         0.000084 sec (saxpy method)
-tri+prep time:         0.000107 sec (incl time to compute L)
-compute C time:        0.000083 sec
-reduce (C) time:       0.000001 sec
-rate       0.08 million edges/sec (incl time for L=tril(A))
-rate       0.11 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 2 speedup 2.36573)
-tricount time:         0.000036 sec (saxpy method)
-tri+prep time:         0.000059 sec (incl time to compute L)
-compute C time:        0.000035 sec
-reduce (C) time:       0.000001 sec
-rate       0.15 million edges/sec (incl time for L=tril(A))
-rate       0.25 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000038 sec (nthreads: 4 speedup 2.17449)
-tricount time:         0.000039 sec (saxpy method)
-tri+prep time:         0.000062 sec (incl time to compute L)
-compute C time:        0.000038 sec
-reduce (C) time:       0.000001 sec
-rate       0.14 million edges/sec (incl time for L=tril(A))
-rate       0.23 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000046 sec (nthreads: 8 speedup 1.79437)
-tricount time:         0.000047 sec (saxpy method)
-tri+prep time:         0.000070 sec (incl time to compute L)
-compute C time:        0.000046 sec
-reduce (C) time:       0.000001 sec
-rate       0.13 million edges/sec (incl time for L=tril(A))
-rate       0.19 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000074 sec (nthreads: 16 speedup 1.12451)
-tricount time:         0.000075 sec (saxpy method)
-tri+prep time:         0.000098 sec (incl time to compute L)
-compute C time:        0.000074 sec
-reduce (C) time:       0.000001 sec
-rate       0.09 million edges/sec (incl time for L=tril(A))
-rate       0.12 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000210 sec (nthreads: 32 speedup 0.393857)
-tricount time:         0.000212 sec (saxpy method)
-tri+prep time:         0.000235 sec (incl time to compute L)
-compute C time:        0.000210 sec
-reduce (C) time:       0.000001 sec
-rate       0.04 million edges/sec (incl time for L=tril(A))
-rate       0.04 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000018 sec
+tricount time:         0.000019 sec (saxpy method)
+tri+prep time:         0.000024 sec (incl time to compute L)
+compute C time:        0.000018 sec
+reduce (C) time:       0.000000 sec
+rate     0.37 million edges/sec (incl time for L=tril(A))
+rate     0.48 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 2 speedup 3.96474)
+tricount time:         0.000005 sec (saxpy method)
+tri+prep time:         0.000010 sec (incl time to compute L)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000000 sec
+rate     0.87 million edges/sec (incl time for L=tril(A))
+rate     1.84 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000004 sec (nthreads: 4 speedup 5.08173)
+tricount time:         0.000004 sec (saxpy method)
+tri+prep time:         0.000009 sec (incl time to compute L)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000000 sec
+rate     0.96 million edges/sec (incl time for L=tril(A))
+rate     2.33 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 8 speedup 3.7378)
+tricount time:         0.000005 sec (saxpy method)
+tri+prep time:         0.000011 sec (incl time to compute L)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000000 sec
+rate     0.84 million edges/sec (incl time for L=tril(A))
+rate     1.70 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 3 by 3, 0 entries, from stdin
 
-total time to read A matrix:       0.000494 sec
+total time to read A matrix:       0.000138 sec
 
 n 3 # edges 0
-U=triu(A) time:        0.000065 sec
-L=tril(A) time:        0.000060 sec
+U=triu(A) time:        0.000033 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000037 sec
-tricount time:         0.000041 sec (dot product method)
-tri+prep time:         0.000166 sec (incl time to compute L and U)
-compute C time:        0.000037 sec
-reduce (C) time:       0.000004 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000011 sec (nthreads: 2 speedup 3.26355)
-tricount time:         0.000012 sec (dot product method)
-tri+prep time:         0.000137 sec (incl time to compute L and U)
-compute C time:        0.000011 sec
-reduce (C) time:       0.000001 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 3.99644)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000135 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000001 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 4.35695)
-tricount time:         0.000009 sec (dot product method)
-tri+prep time:         0.000134 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000001 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 16 speedup 8.56816)
-tricount time:         0.000005 sec (dot product method)
-tri+prep time:         0.000130 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+L*U' time (dot):         0.000025 sec
+tricount time:         0.000027 sec (dot product method)
+tri+prep time:         0.000065 sec (incl time to compute L and U)
+compute C time:        0.000025 sec
+reduce (C) time:       0.000003 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 2 speedup 8.4299)
+tricount time:         0.000003 sec (dot product method)
+tri+prep time:         0.000041 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 32 speedup 9.1579)
-tricount time:         0.000004 sec (dot product method)
-tri+prep time:         0.000129 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 4 speedup 11.9314)
+tricount time:         0.000002 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec
-tricount time:         0.000005 sec (dot product method)
-tri+prep time:         0.000130 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 13.4563)
+tricount time:         0.000002 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 1.12559)
-tricount time:         0.000004 sec (dot product method)
-tri+prep time:         0.000129 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec
+tricount time:         0.000003 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 1.06175)
-tricount time:         0.000004 sec (dot product method)
-tri+prep time:         0.000129 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 2 speedup 1.1864)
+tricount time:         0.000002 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 1.13744)
-tricount time:         0.000004 sec (dot product method)
-tri+prep time:         0.000129 sec (incl time to compute L and U)
-compute C time:        0.000004 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 4 speedup 1.17933)
+tricount time:         0.000002 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 16 speedup 0.911892)
-tricount time:         0.000005 sec (dot product method)
-tri+prep time:         0.000130 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 1.18819)
+tricount time:         0.000002 sec (dot product method)
+tri+prep time:         0.000040 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 32 speedup 0.849426)
-tricount time:         0.000006 sec (dot product method)
-tri+prep time:         0.000131 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000001 sec
-rate       0.00 million edges/sec (incl time for U=triu(A))
-rate       0.00 million edges/sec (just tricount itself)
+rate     0.00 million edges/sec (incl time for U=triu(A))
+rate     0.00 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000032 sec
-tricount time:         0.000032 sec (saxpy method)
-tri+prep time:         0.000093 sec (incl time to compute L)
-compute C time:        0.000032 sec
-reduce (C) time:       0.000001 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000007 sec (nthreads: 2 speedup 4.83128)
-tricount time:         0.000007 sec (saxpy method)
-tri+prep time:         0.000067 sec (incl time to compute L)
-compute C time:        0.000007 sec
-reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 4 speedup 6.37424)
-tricount time:         0.000005 sec (saxpy method)
-tri+prep time:         0.000065 sec (incl time to compute L)
-compute C time:        0.000005 sec
+C<L>=L*L time (saxpy):         0.000016 sec
+tricount time:         0.000016 sec (saxpy method)
+tri+prep time:         0.000021 sec (incl time to compute L)
+compute C time:        0.000016 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 8 speedup 6.72478)
-tricount time:         0.000005 sec (saxpy method)
-tri+prep time:         0.000065 sec (incl time to compute L)
-compute C time:        0.000005 sec
+rate     0.00 million edges/sec (incl time for L=tril(A))
+rate     0.00 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000004 sec (nthreads: 2 speedup 4.13975)
+tricount time:         0.000004 sec (saxpy method)
+tri+prep time:         0.000009 sec (incl time to compute L)
+compute C time:        0.000004 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 16 speedup 6.33389)
-tricount time:         0.000005 sec (saxpy method)
-tri+prep time:         0.000065 sec (incl time to compute L)
-compute C time:        0.000005 sec
+rate     0.00 million edges/sec (incl time for L=tril(A))
+rate     0.00 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000003 sec (nthreads: 4 speedup 4.78922)
+tricount time:         0.000004 sec (saxpy method)
+tri+prep time:         0.000009 sec (incl time to compute L)
+compute C time:        0.000003 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 32 speedup 6.6533)
+rate     0.00 million edges/sec (incl time for L=tril(A))
+rate     0.00 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 8 speedup 3.47362)
 tricount time:         0.000005 sec (saxpy method)
-tri+prep time:         0.000065 sec (incl time to compute L)
+tri+prep time:         0.000010 sec (incl time to compute L)
 compute C time:        0.000005 sec
 reduce (C) time:       0.000000 sec
-rate       0.00 million edges/sec (incl time for L=tril(A))
-rate       0.00 million edges/sec (just tricount itself)
+rate     0.00 million edges/sec (incl time for L=tril(A))
+rate     0.00 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 4 by 4, 4 entries, from stdin
 
-total time to read A matrix:       0.000469 sec
+total time to read A matrix:       0.000148 sec
 
 n 4 # edges 2
-U=triu(A) time:        0.000110 sec
-L=tril(A) time:        0.000023 sec
+U=triu(A) time:        0.000038 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000036 sec
-tricount time:         0.000069 sec (dot product method)
-tri+prep time:         0.000203 sec (incl time to compute L and U)
-compute C time:        0.000036 sec
-reduce (C) time:       0.000033 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.03 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000012 sec (nthreads: 2 speedup 3.0776)
-tricount time:         0.000025 sec (dot product method)
-tri+prep time:         0.000159 sec (incl time to compute L and U)
-compute C time:        0.000012 sec
-reduce (C) time:       0.000013 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.08 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 3.86328)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000155 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000012 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 3.93326)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000155 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000012 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 4.02226)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000154 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000012 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 32 speedup 4.02593)
-tricount time:         0.000020 sec (dot product method)
-tri+prep time:         0.000154 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000011 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000011 sec
-tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000158 sec (incl time to compute L and U)
-compute C time:        0.000011 sec
-reduce (C) time:       0.000013 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.08 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 2 speedup 1.18349)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000155 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000012 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.09 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 1.19032)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000155 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000011 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 1.28583)
-tricount time:         0.000020 sec (dot product method)
-tri+prep time:         0.000154 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000011 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 1.2805)
-tricount time:         0.000020 sec (dot product method)
-tri+prep time:         0.000154 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000011 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000012 sec (nthreads: 32 speedup 0.951587)
-tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000158 sec (incl time to compute L and U)
-compute C time:        0.000012 sec
-reduce (C) time:       0.000012 sec
-rate       0.01 million edges/sec (incl time for U=triu(A))
-rate       0.08 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000028 sec
+tricount time:         0.000037 sec (dot product method)
+tri+prep time:         0.000079 sec (incl time to compute L and U)
+compute C time:        0.000028 sec
+reduce (C) time:       0.000009 sec
+rate     0.03 million edges/sec (incl time for U=triu(A))
+rate     0.05 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 2 speedup 5.56541)
+tricount time:         0.000010 sec (dot product method)
+tri+prep time:         0.000052 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000005 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.21 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 4 speedup 5.54574)
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000051 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000003 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.24 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 12.1662)
+tricount time:         0.000005 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
+reduce (C) time:       0.000002 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.43 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec
+tricount time:         0.000005 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.40 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 0.644123)
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000051 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000004 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.25 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 4 speedup 0.545989)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000050 sec (incl time to compute L and U)
+compute C time:        0.000005 sec
+reduce (C) time:       0.000003 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.27 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 1.134)
+tricount time:         0.000004 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
+reduce (C) time:       0.000002 sec
+rate     0.04 million edges/sec (incl time for U=triu(A))
+rate     0.45 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000078 sec
-tricount time:         0.000080 sec (saxpy method)
-tri+prep time:         0.000103 sec (incl time to compute L)
-compute C time:        0.000078 sec
-reduce (C) time:       0.000001 sec
-rate       0.02 million edges/sec (incl time for L=tril(A))
-rate       0.03 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000045 sec (nthreads: 2 speedup 1.7373)
-tricount time:         0.000046 sec (saxpy method)
-tri+prep time:         0.000070 sec (incl time to compute L)
-compute C time:        0.000045 sec
-reduce (C) time:       0.000001 sec
-rate       0.03 million edges/sec (incl time for L=tril(A))
-rate       0.04 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000044 sec (nthreads: 4 speedup 1.78566)
-tricount time:         0.000045 sec (saxpy method)
-tri+prep time:         0.000068 sec (incl time to compute L)
-compute C time:        0.000044 sec
+C<L>=L*L time (saxpy):         0.000040 sec
+tricount time:         0.000041 sec (saxpy method)
+tri+prep time:         0.000046 sec (incl time to compute L)
+compute C time:        0.000040 sec
 reduce (C) time:       0.000001 sec
-rate       0.03 million edges/sec (incl time for L=tril(A))
-rate       0.04 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000064 sec (nthreads: 8 speedup 1.21826)
-tricount time:         0.000065 sec (saxpy method)
-tri+prep time:         0.000089 sec (incl time to compute L)
-compute C time:        0.000064 sec
+rate     0.04 million edges/sec (incl time for L=tril(A))
+rate     0.05 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 2 speedup 2.21528)
+tricount time:         0.000019 sec (saxpy method)
+tri+prep time:         0.000024 sec (incl time to compute L)
+compute C time:        0.000018 sec
 reduce (C) time:       0.000001 sec
-rate       0.02 million edges/sec (incl time for L=tril(A))
-rate       0.03 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000096 sec (nthreads: 16 speedup 0.812331)
-tricount time:         0.000097 sec (saxpy method)
-tri+prep time:         0.000121 sec (incl time to compute L)
-compute C time:        0.000096 sec
+rate     0.08 million edges/sec (incl time for L=tril(A))
+rate     0.11 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000019 sec (nthreads: 4 speedup 2.15104)
+tricount time:         0.000019 sec (saxpy method)
+tri+prep time:         0.000024 sec (incl time to compute L)
+compute C time:        0.000019 sec
 reduce (C) time:       0.000001 sec
-rate       0.02 million edges/sec (incl time for L=tril(A))
-rate       0.02 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000175 sec (nthreads: 32 speedup 0.446016)
-tricount time:         0.000177 sec (saxpy method)
-tri+prep time:         0.000200 sec (incl time to compute L)
-compute C time:        0.000175 sec
+rate     0.08 million edges/sec (incl time for L=tril(A))
+rate     0.10 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000024 sec (nthreads: 8 speedup 1.64287)
+tricount time:         0.000025 sec (saxpy method)
+tri+prep time:         0.000030 sec (incl time to compute L)
+compute C time:        0.000024 sec
 reduce (C) time:       0.000001 sec
-rate       0.01 million edges/sec (incl time for L=tril(A))
-rate       0.01 million edges/sec (just tricount itself)
+rate     0.07 million edges/sec (incl time for L=tril(A))
+rate     0.08 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 4 by 4, 10 entries, from stdin
 
-total time to read A matrix:       0.000276 sec
+total time to read A matrix:       0.000188 sec
 
 n 4 # edges 5
-U=triu(A) time:        0.000032 sec
-L=tril(A) time:        0.000011 sec
+U=triu(A) time:        0.000038 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 2
-L*U' time (dot):         0.000019 sec
-tricount time:         0.000074 sec (dot product method)
-tri+prep time:         0.000118 sec (incl time to compute L and U)
-compute C time:        0.000019 sec
-reduce (C) time:       0.000055 sec
-rate       0.04 million edges/sec (incl time for U=triu(A))
-rate       0.07 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000007 sec (nthreads: 2 speedup 2.57759)
-tricount time:         0.000011 sec (dot product method)
-tri+prep time:         0.000055 sec (incl time to compute L and U)
-compute C time:        0.000007 sec
-reduce (C) time:       0.000004 sec
-rate       0.09 million edges/sec (incl time for U=triu(A))
-rate       0.44 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 4 speedup 3.97088)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000052 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000004 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.60 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 4.4361)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
+L*U' time (dot):         0.000030 sec
+tricount time:         0.000038 sec (dot product method)
+tri+prep time:         0.000080 sec (incl time to compute L and U)
+compute C time:        0.000030 sec
+reduce (C) time:       0.000008 sec
+rate     0.06 million edges/sec (incl time for U=triu(A))
+rate     0.13 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 8.52063)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.65 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 16 speedup 4.46366)
+reduce (C) time:       0.000002 sec
+rate     0.10 million edges/sec (incl time for U=triu(A))
+rate     0.87 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 4 speedup 11.9581)
+tricount time:         0.000004 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000002 sec
+rate     0.11 million edges/sec (incl time for U=triu(A))
+rate     1.12 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000002 sec (nthreads: 8 speedup 13.6524)
+tricount time:         0.000004 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000002 sec
+reduce (C) time:       0.000002 sec
+rate     0.11 million edges/sec (incl time for U=triu(A))
+rate     1.23 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec
 tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
+tri+prep time:         0.000050 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
 reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.65 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 32 speedup 3.79627)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000052 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.60 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec
-tricount time:         0.000009 sec (dot product method)
-tri+prep time:         0.000052 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000004 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.57 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 1.15362)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
+rate     0.10 million edges/sec (incl time for U=triu(A))
+rate     0.66 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 1.01534)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.64 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 1.1235)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
+reduce (C) time:       0.000002 sec
+rate     0.10 million edges/sec (incl time for U=triu(A))
+rate     0.81 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 1.07426)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.64 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 1.17186)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
+reduce (C) time:       0.000002 sec
+rate     0.10 million edges/sec (incl time for U=triu(A))
+rate     0.86 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 1.02727)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.66 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 16 speedup 1.11724)
-tricount time:         0.000008 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000003 sec
-rate       0.10 million edges/sec (incl time for U=triu(A))
-rate       0.63 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000006 sec (nthreads: 32 speedup 0.875096)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000053 sec (incl time to compute L and U)
-compute C time:        0.000006 sec
-reduce (C) time:       0.000004 sec
-rate       0.09 million edges/sec (incl time for U=triu(A))
-rate       0.52 million edges/sec (just tricount itself)
+reduce (C) time:       0.000002 sec
+rate     0.10 million edges/sec (incl time for U=triu(A))
+rate     0.83 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000042 sec
-tricount time:         0.000042 sec (saxpy method)
-tri+prep time:         0.000054 sec (incl time to compute L)
-compute C time:        0.000042 sec
+C<L>=L*L time (saxpy):         0.000025 sec
+tricount time:         0.000026 sec (saxpy method)
+tri+prep time:         0.000031 sec (incl time to compute L)
+compute C time:        0.000025 sec
 reduce (C) time:       0.000001 sec
-rate       0.09 million edges/sec (incl time for L=tril(A))
-rate       0.12 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000016 sec (nthreads: 2 speedup 2.62628)
-tricount time:         0.000016 sec (saxpy method)
-tri+prep time:         0.000028 sec (incl time to compute L)
-compute C time:        0.000016 sec
-reduce (C) time:       0.000000 sec
-rate       0.18 million edges/sec (incl time for L=tril(A))
-rate       0.31 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000017 sec (nthreads: 4 speedup 2.51981)
-tricount time:         0.000017 sec (saxpy method)
-tri+prep time:         0.000028 sec (incl time to compute L)
-compute C time:        0.000017 sec
+rate     0.16 million edges/sec (incl time for L=tril(A))
+rate     0.19 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000007 sec (nthreads: 2 speedup 3.8032)
+tricount time:         0.000007 sec (saxpy method)
+tri+prep time:         0.000012 sec (incl time to compute L)
+compute C time:        0.000007 sec
 reduce (C) time:       0.000000 sec
-rate       0.18 million edges/sec (incl time for L=tril(A))
-rate       0.29 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000023 sec (nthreads: 8 speedup 1.79777)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000035 sec (incl time to compute L)
-compute C time:        0.000023 sec
+rate     0.41 million edges/sec (incl time for L=tril(A))
+rate     0.71 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000006 sec (nthreads: 4 speedup 4.31124)
+tricount time:         0.000006 sec (saxpy method)
+tri+prep time:         0.000011 sec (incl time to compute L)
+compute C time:        0.000006 sec
 reduce (C) time:       0.000000 sec
-rate       0.14 million edges/sec (incl time for L=tril(A))
-rate       0.21 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 16 speedup 1.19239)
-tricount time:         0.000035 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000035 sec
+rate     0.44 million edges/sec (incl time for L=tril(A))
+rate     0.81 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000005 sec (nthreads: 8 speedup 5.42519)
+tricount time:         0.000005 sec (saxpy method)
+tri+prep time:         0.000010 sec (incl time to compute L)
+compute C time:        0.000005 sec
 reduce (C) time:       0.000000 sec
-rate       0.11 million edges/sec (incl time for L=tril(A))
-rate       0.14 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000062 sec (nthreads: 32 speedup 0.676284)
-tricount time:         0.000062 sec (saxpy method)
-tri+prep time:         0.000074 sec (incl time to compute L)
-compute C time:        0.000062 sec
-reduce (C) time:       0.000001 sec
-rate       0.07 million edges/sec (incl time for L=tril(A))
-rate       0.08 million edges/sec (just tricount itself)
+rate     0.49 million edges/sec (incl time for L=tril(A))
+rate     0.98 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 7 by 7, 16 entries, from stdin
 
-total time to read A matrix:       0.000460 sec
+total time to read A matrix:       0.000156 sec
 
 n 7 # edges 8
-U=triu(A) time:        0.000028 sec
-L=tril(A) time:        0.000011 sec
+U=triu(A) time:        0.000019 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000016 sec
-tricount time:         0.000032 sec (dot product method)
-tri+prep time:         0.000071 sec (incl time to compute L and U)
-compute C time:        0.000016 sec
-reduce (C) time:       0.000016 sec
-rate       0.11 million edges/sec (incl time for U=triu(A))
-rate       0.25 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000006 sec (nthreads: 2 speedup 2.53277)
-tricount time:         0.000013 sec (dot product method)
-tri+prep time:         0.000052 sec (incl time to compute L and U)
-compute C time:        0.000006 sec
-reduce (C) time:       0.000007 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.63 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 4 speedup 3.07408)
-tricount time:         0.000011 sec (dot product method)
+L*U' time (dot):         0.000020 sec
+tricount time:         0.000026 sec (dot product method)
 tri+prep time:         0.000050 sec (incl time to compute L and U)
+compute C time:        0.000020 sec
+reduce (C) time:       0.000007 sec
+rate     0.16 million edges/sec (incl time for U=triu(A))
+rate     0.30 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000005 sec (nthreads: 2 speedup 3.78177)
+tricount time:         0.000009 sec (dot product method)
+tri+prep time:         0.000032 sec (incl time to compute L and U)
 compute C time:        0.000005 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.72 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 8 speedup 3.46949)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 16 speedup 3.61295)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
+reduce (C) time:       0.000003 sec
+rate     0.25 million edges/sec (incl time for U=triu(A))
+rate     0.92 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 5.03558)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000031 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.79 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 32 speedup 3.7105)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000048 sec (incl time to compute L and U)
+reduce (C) time:       0.000003 sec
+rate     0.26 million edges/sec (incl time for U=triu(A))
+rate     1.17 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 5.07063)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000031 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000005 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.82 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec
-tricount time:         0.000011 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.75 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000005 sec (nthreads: 2 speedup 1.04826)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
-compute C time:        0.000005 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.78 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 1.11248)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
+reduce (C) time:       0.000003 sec
+rate     0.26 million edges/sec (incl time for U=triu(A))
+rate     1.19 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000032 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.80 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 8 speedup 1.11425)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
+reduce (C) time:       0.000004 sec
+rate     0.25 million edges/sec (incl time for U=triu(A))
+rate     1.03 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 0.982245)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000031 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.79 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000004 sec (nthreads: 16 speedup 1.14716)
-tricount time:         0.000010 sec (dot product method)
-tri+prep time:         0.000049 sec (incl time to compute L and U)
+reduce (C) time:       0.000003 sec
+rate     0.26 million edges/sec (incl time for U=triu(A))
+rate     1.09 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 0.984639)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000031 sec (incl time to compute L and U)
 compute C time:        0.000004 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.81 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000006 sec (nthreads: 32 speedup 0.775863)
-tricount time:         0.000012 sec (dot product method)
-tri+prep time:         0.000051 sec (incl time to compute L and U)
-compute C time:        0.000006 sec
-reduce (C) time:       0.000006 sec
-rate       0.16 million edges/sec (incl time for U=triu(A))
-rate       0.65 million edges/sec (just tricount itself)
+reduce (C) time:       0.000003 sec
+rate     0.26 million edges/sec (incl time for U=triu(A))
+rate     1.09 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 1.49371)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000029 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     0.27 million edges/sec (incl time for U=triu(A))
+rate     1.42 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000024 sec
-tricount time:         0.000025 sec (saxpy method)
-tri+prep time:         0.000036 sec (incl time to compute L)
-compute C time:        0.000024 sec
-reduce (C) time:       0.000001 sec
-rate       0.22 million edges/sec (incl time for L=tril(A))
-rate       0.32 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 2 speedup 1.35603)
-tricount time:         0.000018 sec (saxpy method)
-tri+prep time:         0.000029 sec (incl time to compute L)
-compute C time:        0.000018 sec
+C<L>=L*L time (saxpy):         0.000022 sec
+tricount time:         0.000022 sec (saxpy method)
+tri+prep time:         0.000027 sec (incl time to compute L)
+compute C time:        0.000022 sec
 reduce (C) time:       0.000000 sec
-rate       0.27 million edges/sec (incl time for L=tril(A))
-rate       0.43 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 4 speedup 1.36013)
-tricount time:         0.000018 sec (saxpy method)
-tri+prep time:         0.000029 sec (incl time to compute L)
-compute C time:        0.000018 sec
+rate     0.30 million edges/sec (incl time for L=tril(A))
+rate     0.36 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 2 speedup 1.85364)
+tricount time:         0.000012 sec (saxpy method)
+tri+prep time:         0.000017 sec (incl time to compute L)
+compute C time:        0.000012 sec
 reduce (C) time:       0.000000 sec
-rate       0.27 million edges/sec (incl time for L=tril(A))
-rate       0.44 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000024 sec (nthreads: 8 speedup 1.02044)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000035 sec (incl time to compute L)
-compute C time:        0.000024 sec
+rate     0.48 million edges/sec (incl time for L=tril(A))
+rate     0.67 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 4 speedup 1.75144)
+tricount time:         0.000013 sec (saxpy method)
+tri+prep time:         0.000017 sec (incl time to compute L)
+compute C time:        0.000012 sec
 reduce (C) time:       0.000000 sec
-rate       0.23 million edges/sec (incl time for L=tril(A))
-rate       0.33 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 16 speedup 0.693543)
-tricount time:         0.000036 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000035 sec
+rate     0.46 million edges/sec (incl time for L=tril(A))
+rate     0.63 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000026 sec (nthreads: 8 speedup 0.816327)
+tricount time:         0.000027 sec (saxpy method)
+tri+prep time:         0.000031 sec (incl time to compute L)
+compute C time:        0.000026 sec
 reduce (C) time:       0.000000 sec
-rate       0.17 million edges/sec (incl time for L=tril(A))
-rate       0.22 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000057 sec (nthreads: 32 speedup 0.43115)
-tricount time:         0.000057 sec (saxpy method)
-tri+prep time:         0.000068 sec (incl time to compute L)
-compute C time:        0.000057 sec
-reduce (C) time:       0.000001 sec
-rate       0.12 million edges/sec (incl time for L=tril(A))
-rate       0.14 million edges/sec (just tricount itself)
+rate     0.25 million edges/sec (incl time for L=tril(A))
+rate     0.30 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 304 by 304, 876 entries, from stdin
 
-total time to read A matrix:       0.000746 sec
+total time to read A matrix:       0.000336 sec
 
 n 304 # edges 438
-U=triu(A) time:        0.000033 sec
-L=tril(A) time:        0.000015 sec
+U=triu(A) time:        0.000027 sec
+L=tril(A) time:        0.000008 sec
 
 ------------------------------------- dot product method:
 # triangles 0
 L*U' time (dot):         0.000026 sec
-tricount time:         0.000048 sec (dot product method)
-tri+prep time:         0.000097 sec (incl time to compute L and U)
+tricount time:         0.000036 sec (dot product method)
+tri+prep time:         0.000071 sec (incl time to compute L and U)
 compute C time:        0.000026 sec
-reduce (C) time:       0.000022 sec
-rate       4.53 million edges/sec (incl time for U=triu(A))
-rate       9.14 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000016 sec (nthreads: 2 speedup 1.58962)
-tricount time:         0.000028 sec (dot product method)
-tri+prep time:         0.000076 sec (incl time to compute L and U)
+reduce (C) time:       0.000010 sec
+rate     6.15 million edges/sec (incl time for U=triu(A))
+rate    12.12 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000016 sec (nthreads: 2 speedup 1.58173)
+tricount time:         0.000025 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
 compute C time:        0.000016 sec
-reduce (C) time:       0.000011 sec
-rate       5.73 million edges/sec (incl time for U=triu(A))
-rate      15.87 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec (nthreads: 4 speedup 1.82526)
+reduce (C) time:       0.000009 sec
+rate     7.29 million edges/sec (incl time for U=triu(A))
+rate    17.49 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000016 sec (nthreads: 4 speedup 1.61796)
 tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000010 sec
-rate       5.98 million edges/sec (incl time for U=triu(A))
-rate      17.97 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec (nthreads: 8 speedup 1.8724)
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000016 sec
+reduce (C) time:       0.000008 sec
+rate     7.40 million edges/sec (incl time for U=triu(A))
+rate    18.15 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec (nthreads: 8 speedup 1.67222)
 tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000010 sec
-rate       6.01 million edges/sec (incl time for U=triu(A))
-rate      18.23 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec (nthreads: 16 speedup 1.89276)
-tricount time:         0.000023 sec (dot product method)
-tri+prep time:         0.000072 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000010 sec
-rate       6.06 million edges/sec (incl time for U=triu(A))
-rate      18.69 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec (nthreads: 32 speedup 1.89218)
-tricount time:         0.000023 sec (dot product method)
-tri+prep time:         0.000072 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000010 sec
-rate       6.07 million edges/sec (incl time for U=triu(A))
-rate      18.79 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec
-tricount time:         0.000023 sec (dot product method)
-tri+prep time:         0.000072 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000009 sec
-rate       6.10 million edges/sec (incl time for U=triu(A))
-rate      19.09 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 2 speedup 1.03769)
-tricount time:         0.000022 sec (dot product method)
-tri+prep time:         0.000071 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
-reduce (C) time:       0.000009 sec
-rate       6.18 million edges/sec (incl time for U=triu(A))
-rate      19.91 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 4 speedup 1.04903)
-tricount time:         0.000022 sec (dot product method)
-tri+prep time:         0.000071 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000009 sec
-rate       6.19 million edges/sec (incl time for U=triu(A))
-rate      20.02 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 8 speedup 1.02099)
-tricount time:         0.000022 sec (dot product method)
-tri+prep time:         0.000071 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
+rate     7.41 million edges/sec (incl time for U=triu(A))
+rate    18.22 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec
+tricount time:         0.000024 sec (dot product method)
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000009 sec
-rate       6.17 million edges/sec (incl time for U=triu(A))
-rate      19.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 16 speedup 1.04879)
+rate     7.44 million edges/sec (incl time for U=triu(A))
+rate    18.39 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000014 sec (nthreads: 2 speedup 1.07105)
 tricount time:         0.000022 sec (dot product method)
-tri+prep time:         0.000071 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
+tri+prep time:         0.000058 sec (incl time to compute L and U)
+compute C time:        0.000014 sec
 reduce (C) time:       0.000009 sec
-rate       6.20 million edges/sec (incl time for U=triu(A))
-rate      20.07 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000014 sec (nthreads: 32 speedup 0.951763)
+rate     7.61 million edges/sec (incl time for U=triu(A))
+rate    19.51 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec (nthreads: 4 speedup 1.00587)
 tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000014 sec
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000009 sec
-rate       6.02 million edges/sec (incl time for U=triu(A))
-rate      18.36 million edges/sec (just tricount itself)
+rate     7.44 million edges/sec (incl time for U=triu(A))
+rate    18.38 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000015 sec (nthreads: 8 speedup 1.00443)
+tricount time:         0.000023 sec (dot product method)
+tri+prep time:         0.000058 sec (incl time to compute L and U)
+compute C time:        0.000015 sec
+reduce (C) time:       0.000008 sec
+rate     7.54 million edges/sec (incl time for U=triu(A))
+rate    19.06 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000037 sec
-tricount time:         0.000038 sec (saxpy method)
-tri+prep time:         0.000053 sec (incl time to compute L)
-compute C time:        0.000037 sec
-reduce (C) time:       0.000001 sec
-rate       8.26 million edges/sec (incl time for L=tril(A))
-rate      11.63 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000030 sec (nthreads: 2 speedup 1.22817)
-tricount time:         0.000031 sec (saxpy method)
-tri+prep time:         0.000046 sec (incl time to compute L)
-compute C time:        0.000030 sec
-reduce (C) time:       0.000001 sec
-rate       9.51 million edges/sec (incl time for L=tril(A))
-rate      14.29 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000031 sec (nthreads: 4 speedup 1.20926)
-tricount time:         0.000031 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000031 sec
-reduce (C) time:       0.000001 sec
-rate       9.41 million edges/sec (incl time for L=tril(A))
-rate      14.06 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000036 sec (nthreads: 8 speedup 1.02152)
-tricount time:         0.000037 sec (saxpy method)
+C<L>=L*L time (saxpy):         0.000043 sec
+tricount time:         0.000044 sec (saxpy method)
 tri+prep time:         0.000052 sec (incl time to compute L)
-compute C time:        0.000036 sec
+compute C time:        0.000043 sec
 reduce (C) time:       0.000001 sec
-rate       8.40 million edges/sec (incl time for L=tril(A))
-rate      11.91 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000047 sec (nthreads: 16 speedup 0.781655)
-tricount time:         0.000048 sec (saxpy method)
-tri+prep time:         0.000063 sec (incl time to compute L)
-compute C time:        0.000047 sec
+rate     8.50 million edges/sec (incl time for L=tril(A))
+rate     9.99 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000030 sec (nthreads: 2 speedup 1.45681)
+tricount time:         0.000030 sec (saxpy method)
+tri+prep time:         0.000038 sec (incl time to compute L)
+compute C time:        0.000030 sec
 reduce (C) time:       0.000001 sec
-rate       6.93 million edges/sec (incl time for L=tril(A))
-rate       9.16 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000072 sec (nthreads: 32 speedup 0.514651)
-tricount time:         0.000072 sec (saxpy method)
-tri+prep time:         0.000088 sec (incl time to compute L)
-compute C time:        0.000072 sec
+rate    11.58 million edges/sec (incl time for L=tril(A))
+rate    14.53 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000029 sec (nthreads: 4 speedup 1.46942)
+tricount time:         0.000030 sec (saxpy method)
+tri+prep time:         0.000038 sec (incl time to compute L)
+compute C time:        0.000029 sec
+reduce (C) time:       0.000000 sec
+rate    11.67 million edges/sec (incl time for L=tril(A))
+rate    14.67 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 8 speedup 1.23519)
+tricount time:         0.000035 sec (saxpy method)
+tri+prep time:         0.000043 sec (incl time to compute L)
+compute C time:        0.000035 sec
 reduce (C) time:       0.000001 sec
-rate       4.98 million edges/sec (incl time for L=tril(A))
-rate       6.04 million edges/sec (just tricount itself)
+rate    10.15 million edges/sec (incl time for L=tril(A))
+rate    12.35 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 48 by 48, 352 entries, from stdin
 
-total time to read A matrix:       0.000803 sec
+total time to read A matrix:       0.000328 sec
 
 n 48 # edges 176
-U=triu(A) time:        0.000065 sec
-L=tril(A) time:        0.000026 sec
+U=triu(A) time:        0.000035 sec
+L=tril(A) time:        0.000009 sec
 
 ------------------------------------- dot product method:
 # triangles 160
-L*U' time (dot):         0.000049 sec
-tricount time:         0.000081 sec (dot product method)
-tri+prep time:         0.000173 sec (incl time to compute L and U)
-compute C time:        0.000049 sec
-reduce (C) time:       0.000032 sec
-rate       1.02 million edges/sec (incl time for U=triu(A))
-rate       2.17 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000027 sec (nthreads: 2 speedup 1.80287)
-tricount time:         0.000039 sec (dot product method)
-tri+prep time:         0.000131 sec (incl time to compute L and U)
-compute C time:        0.000027 sec
-reduce (C) time:       0.000012 sec
-rate       1.35 million edges/sec (incl time for U=triu(A))
-rate       4.50 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000022 sec (nthreads: 4 speedup 2.2347)
-tricount time:         0.000032 sec (dot product method)
-tri+prep time:         0.000124 sec (incl time to compute L and U)
-compute C time:        0.000022 sec
-reduce (C) time:       0.000010 sec
-rate       1.42 million edges/sec (incl time for U=triu(A))
-rate       5.48 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000020 sec (nthreads: 8 speedup 2.47809)
-tricount time:         0.000029 sec (dot product method)
-tri+prep time:         0.000121 sec (incl time to compute L and U)
-compute C time:        0.000020 sec
-reduce (C) time:       0.000010 sec
-rate       1.45 million edges/sec (incl time for U=triu(A))
-rate       5.98 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000020 sec (nthreads: 16 speedup 2.50407)
-tricount time:         0.000030 sec (dot product method)
-tri+prep time:         0.000122 sec (incl time to compute L and U)
-compute C time:        0.000020 sec
-reduce (C) time:       0.000010 sec
-rate       1.45 million edges/sec (incl time for U=triu(A))
-rate       5.89 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000018 sec (nthreads: 32 speedup 2.66398)
-tricount time:         0.000028 sec (dot product method)
-tri+prep time:         0.000120 sec (incl time to compute L and U)
-compute C time:        0.000018 sec
-reduce (C) time:       0.000010 sec
-rate       1.47 million edges/sec (incl time for U=triu(A))
-rate       6.27 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000030 sec
-tricount time:         0.000042 sec (dot product method)
-tri+prep time:         0.000134 sec (incl time to compute L and U)
-compute C time:        0.000030 sec
-reduce (C) time:       0.000013 sec
-rate       1.31 million edges/sec (incl time for U=triu(A))
-rate       4.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000025 sec (nthreads: 2 speedup 1.19216)
-tricount time:         0.000036 sec (dot product method)
-tri+prep time:         0.000128 sec (incl time to compute L and U)
-compute C time:        0.000025 sec
+L*U' time (dot):         0.000039 sec
+tricount time:         0.000050 sec (dot product method)
+tri+prep time:         0.000094 sec (incl time to compute L and U)
+compute C time:        0.000039 sec
 reduce (C) time:       0.000011 sec
-rate       1.38 million edges/sec (incl time for U=triu(A))
-rate       4.92 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000022 sec (nthreads: 4 speedup 1.32586)
-tricount time:         0.000032 sec (dot product method)
-tri+prep time:         0.000124 sec (incl time to compute L and U)
-compute C time:        0.000022 sec
-reduce (C) time:       0.000010 sec
-rate       1.42 million edges/sec (incl time for U=triu(A))
-rate       5.48 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000021 sec (nthreads: 8 speedup 1.38195)
-tricount time:         0.000031 sec (dot product method)
-tri+prep time:         0.000122 sec (incl time to compute L and U)
-compute C time:        0.000021 sec
-reduce (C) time:       0.000009 sec
-rate       1.44 million edges/sec (incl time for U=triu(A))
-rate       5.73 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000020 sec (nthreads: 16 speedup 1.49644)
-tricount time:         0.000029 sec (dot product method)
-tri+prep time:         0.000121 sec (incl time to compute L and U)
-compute C time:        0.000020 sec
-reduce (C) time:       0.000009 sec
-rate       1.46 million edges/sec (incl time for U=triu(A))
-rate       6.03 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000021 sec (nthreads: 32 speedup 1.37539)
-tricount time:         0.000045 sec (dot product method)
-tri+prep time:         0.000136 sec (incl time to compute L and U)
-compute C time:        0.000021 sec
-reduce (C) time:       0.000023 sec
-rate       1.29 million edges/sec (incl time for U=triu(A))
-rate       3.94 million edges/sec (just tricount itself)
+rate     1.87 million edges/sec (incl time for U=triu(A))
+rate     3.54 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000011 sec (nthreads: 2 speedup 3.60124)
+tricount time:         0.000015 sec (dot product method)
+tri+prep time:         0.000059 sec (incl time to compute L and U)
+compute C time:        0.000011 sec
+reduce (C) time:       0.000004 sec
+rate     2.99 million edges/sec (incl time for U=triu(A))
+rate    12.01 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000008 sec (nthreads: 4 speedup 4.82118)
+tricount time:         0.000011 sec (dot product method)
+tri+prep time:         0.000055 sec (incl time to compute L and U)
+compute C time:        0.000008 sec
+reduce (C) time:       0.000003 sec
+rate     3.20 million edges/sec (incl time for U=triu(A))
+rate    16.24 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000008 sec (nthreads: 8 speedup 5.09536)
+tricount time:         0.000010 sec (dot product method)
+tri+prep time:         0.000055 sec (incl time to compute L and U)
+compute C time:        0.000008 sec
+reduce (C) time:       0.000003 sec
+rate     3.22 million edges/sec (incl time for U=triu(A))
+rate    16.86 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000012 sec
+tricount time:         0.000016 sec (dot product method)
+tri+prep time:         0.000060 sec (incl time to compute L and U)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000004 sec
+rate     2.93 million edges/sec (incl time for U=triu(A))
+rate    11.11 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000010 sec (nthreads: 2 speedup 1.2077)
+tricount time:         0.000013 sec (dot product method)
+tri+prep time:         0.000057 sec (incl time to compute L and U)
+compute C time:        0.000010 sec
+reduce (C) time:       0.000003 sec
+rate     3.07 million edges/sec (incl time for U=triu(A))
+rate    13.37 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 1.38824)
+tricount time:         0.000012 sec (dot product method)
+tri+prep time:         0.000056 sec (incl time to compute L and U)
+compute C time:        0.000009 sec
+reduce (C) time:       0.000003 sec
+rate     3.15 million edges/sec (incl time for U=triu(A))
+rate    15.17 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000008 sec (nthreads: 8 speedup 1.55024)
+tricount time:         0.000010 sec (dot product method)
+tri+prep time:         0.000055 sec (incl time to compute L and U)
+compute C time:        0.000008 sec
+reduce (C) time:       0.000003 sec
+rate     3.22 million edges/sec (incl time for U=triu(A))
+rate    16.95 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000043 sec
-tricount time:         0.000045 sec (saxpy method)
-tri+prep time:         0.000072 sec (incl time to compute L)
-compute C time:        0.000043 sec
-reduce (C) time:       0.000002 sec
-rate       2.46 million edges/sec (incl time for L=tril(A))
-rate       3.90 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000022 sec (nthreads: 2 speedup 1.92194)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000050 sec (incl time to compute L)
-compute C time:        0.000022 sec
-reduce (C) time:       0.000002 sec
-rate       3.49 million edges/sec (incl time for L=tril(A))
-rate       7.32 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 4 speedup 2.41296)
-tricount time:         0.000019 sec (saxpy method)
-tri+prep time:         0.000046 sec (incl time to compute L)
-compute C time:        0.000018 sec
-reduce (C) time:       0.000002 sec
-rate       3.83 million edges/sec (incl time for L=tril(A))
-rate       9.04 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000015 sec (nthreads: 8 speedup 2.89441)
-tricount time:         0.000016 sec (saxpy method)
-tri+prep time:         0.000043 sec (incl time to compute L)
-compute C time:        0.000015 sec
-reduce (C) time:       0.000002 sec
-rate       4.11 million edges/sec (incl time for L=tril(A))
-rate      10.74 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000014 sec (nthreads: 16 speedup 3.10535)
-tricount time:         0.000015 sec (saxpy method)
+C<L>=L*L time (saxpy):         0.000032 sec
+tricount time:         0.000033 sec (saxpy method)
 tri+prep time:         0.000042 sec (incl time to compute L)
-compute C time:        0.000014 sec
-reduce (C) time:       0.000002 sec
-rate       4.22 million edges/sec (incl time for L=tril(A))
-rate      11.52 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000013 sec (nthreads: 32 speedup 3.20469)
+compute C time:        0.000032 sec
+reduce (C) time:       0.000001 sec
+rate     4.20 million edges/sec (incl time for L=tril(A))
+rate     5.31 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000014 sec (nthreads: 2 speedup 2.30991)
 tricount time:         0.000015 sec (saxpy method)
-tri+prep time:         0.000041 sec (incl time to compute L)
-compute C time:        0.000013 sec
-reduce (C) time:       0.000002 sec
-rate       4.27 million edges/sec (incl time for L=tril(A))
-rate      11.87 million edges/sec (just tricount itself)
+tri+prep time:         0.000023 sec (incl time to compute L)
+compute C time:        0.000014 sec
+reduce (C) time:       0.000001 sec
+rate     7.51 million edges/sec (incl time for L=tril(A))
+rate    12.02 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000011 sec (nthreads: 4 speedup 2.91911)
+tricount time:         0.000012 sec (saxpy method)
+tri+prep time:         0.000020 sec (incl time to compute L)
+compute C time:        0.000011 sec
+reduce (C) time:       0.000001 sec
+rate     8.60 million edges/sec (incl time for L=tril(A))
+rate    15.07 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000011 sec (nthreads: 8 speedup 2.88764)
+tricount time:         0.000012 sec (saxpy method)
+tri+prep time:         0.000021 sec (incl time to compute L)
+compute C time:        0.000011 sec
+reduce (C) time:       0.000001 sec
+rate     8.47 million edges/sec (incl time for L=tril(A))
+rate    14.68 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 4884 by 4884, 285494 entries, from stdin
 
-total time to read A matrix:       0.113711 sec
+total time to read A matrix:       0.070829 sec
 
 n 4884 # edges 142747
-U=triu(A) time:        0.006216 sec
-L=tril(A) time:        0.005391 sec
+U=triu(A) time:        0.000237 sec
+L=tril(A) time:        0.000214 sec
 
 ------------------------------------- dot product method:
 # triangles 1512964
-L*U' time (dot):         0.015270 sec
-tricount time:         0.016954 sec (dot product method)
-tri+prep time:         0.028562 sec (incl time to compute L and U)
-compute C time:        0.015270 sec
-reduce (C) time:       0.001685 sec
-rate       5.00 million edges/sec (incl time for U=triu(A))
-rate       8.42 million edges/sec (just tricount itself)
-L*U' time (dot):         0.009330 sec (nthreads: 2 speedup 1.63666)
-tricount time:         0.009965 sec (dot product method)
-tri+prep time:         0.021572 sec (incl time to compute L and U)
-compute C time:        0.009330 sec
-reduce (C) time:       0.000635 sec
-rate       6.62 million edges/sec (incl time for U=triu(A))
-rate      14.32 million edges/sec (just tricount itself)
-L*U' time (dot):         0.005277 sec (nthreads: 4 speedup 2.89374)
-tricount time:         0.005683 sec (dot product method)
-tri+prep time:         0.017290 sec (incl time to compute L and U)
-compute C time:        0.005277 sec
-reduce (C) time:       0.000406 sec
-rate       8.26 million edges/sec (incl time for U=triu(A))
-rate      25.12 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003258 sec (nthreads: 8 speedup 4.68741)
-tricount time:         0.003645 sec (dot product method)
-tri+prep time:         0.015252 sec (incl time to compute L and U)
-compute C time:        0.003258 sec
-reduce (C) time:       0.000387 sec
-rate       9.36 million edges/sec (incl time for U=triu(A))
-rate      39.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001990 sec (nthreads: 16 speedup 7.67414)
-tricount time:         0.002542 sec (dot product method)
-tri+prep time:         0.014149 sec (incl time to compute L and U)
-compute C time:        0.001990 sec
-reduce (C) time:       0.000552 sec
-rate      10.09 million edges/sec (incl time for U=triu(A))
-rate      56.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002013 sec (nthreads: 32 speedup 7.58719)
-tricount time:         0.004790 sec (dot product method)
-tri+prep time:         0.016397 sec (incl time to compute L and U)
-compute C time:        0.002013 sec
-reduce (C) time:       0.002778 sec
-rate       8.71 million edges/sec (incl time for U=triu(A))
-rate      29.80 million edges/sec (just tricount itself)
-L*U' time (dot):         0.016205 sec
-tricount time:         0.017262 sec (dot product method)
-tri+prep time:         0.028869 sec (incl time to compute L and U)
-compute C time:        0.016205 sec
-reduce (C) time:       0.001056 sec
-rate       4.94 million edges/sec (incl time for U=triu(A))
-rate       8.27 million edges/sec (just tricount itself)
-L*U' time (dot):         0.008890 sec (nthreads: 2 speedup 1.82292)
-tricount time:         0.009502 sec (dot product method)
-tri+prep time:         0.021109 sec (incl time to compute L and U)
-compute C time:        0.008890 sec
-reduce (C) time:       0.000612 sec
-rate       6.76 million edges/sec (incl time for U=triu(A))
-rate      15.02 million edges/sec (just tricount itself)
-L*U' time (dot):         0.005417 sec (nthreads: 4 speedup 2.99155)
-tricount time:         0.005840 sec (dot product method)
-tri+prep time:         0.017448 sec (incl time to compute L and U)
-compute C time:        0.005417 sec
-reduce (C) time:       0.000423 sec
-rate       8.18 million edges/sec (incl time for U=triu(A))
-rate      24.44 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003336 sec (nthreads: 8 speedup 4.858)
-tricount time:         0.003732 sec (dot product method)
-tri+prep time:         0.015339 sec (incl time to compute L and U)
-compute C time:        0.003336 sec
-reduce (C) time:       0.000396 sec
-rate       9.31 million edges/sec (incl time for U=triu(A))
-rate      38.25 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002173 sec (nthreads: 16 speedup 7.4573)
-tricount time:         0.002776 sec (dot product method)
-tri+prep time:         0.014383 sec (incl time to compute L and U)
-compute C time:        0.002173 sec
-reduce (C) time:       0.000603 sec
-rate       9.92 million edges/sec (incl time for U=triu(A))
-rate      51.43 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002292 sec (nthreads: 32 speedup 7.06977)
-tricount time:         0.004473 sec (dot product method)
-tri+prep time:         0.016081 sec (incl time to compute L and U)
-compute C time:        0.002292 sec
-reduce (C) time:       0.002181 sec
-rate       8.88 million edges/sec (incl time for U=triu(A))
-rate      31.91 million edges/sec (just tricount itself)
+L*U' time (dot):         0.016448 sec
+tricount time:         0.017444 sec (dot product method)
+tri+prep time:         0.017895 sec (incl time to compute L and U)
+compute C time:        0.016448 sec
+reduce (C) time:       0.000995 sec
+rate     7.98 million edges/sec (incl time for U=triu(A))
+rate     8.18 million edges/sec (just tricount itself)
+L*U' time (dot):         0.007960 sec (nthreads: 2 speedup 2.06624)
+tricount time:         0.009635 sec (dot product method)
+tri+prep time:         0.010086 sec (incl time to compute L and U)
+compute C time:        0.007960 sec
+reduce (C) time:       0.001674 sec
+rate    14.15 million edges/sec (incl time for U=triu(A))
+rate    14.82 million edges/sec (just tricount itself)
+L*U' time (dot):         0.004200 sec (nthreads: 4 speedup 3.91649)
+tricount time:         0.004743 sec (dot product method)
+tri+prep time:         0.005194 sec (incl time to compute L and U)
+compute C time:        0.004200 sec
+reduce (C) time:       0.000543 sec
+rate    27.48 million edges/sec (incl time for U=triu(A))
+rate    30.10 million edges/sec (just tricount itself)
+L*U' time (dot):         0.009520 sec (nthreads: 8 speedup 1.72783)
+tricount time:         0.010260 sec (dot product method)
+tri+prep time:         0.010711 sec (incl time to compute L and U)
+compute C time:        0.009520 sec
+reduce (C) time:       0.000741 sec
+rate    13.33 million edges/sec (incl time for U=triu(A))
+rate    13.91 million edges/sec (just tricount itself)
+L*U' time (dot):         0.015530 sec
+tricount time:         0.016475 sec (dot product method)
+tri+prep time:         0.016926 sec (incl time to compute L and U)
+compute C time:        0.015530 sec
+reduce (C) time:       0.000945 sec
+rate     8.43 million edges/sec (incl time for U=triu(A))
+rate     8.66 million edges/sec (just tricount itself)
+L*U' time (dot):         0.008395 sec (nthreads: 2 speedup 1.84985)
+tricount time:         0.008839 sec (dot product method)
+tri+prep time:         0.009290 sec (incl time to compute L and U)
+compute C time:        0.008395 sec
+reduce (C) time:       0.000444 sec
+rate    15.37 million edges/sec (incl time for U=triu(A))
+rate    16.15 million edges/sec (just tricount itself)
+L*U' time (dot):         0.004378 sec (nthreads: 4 speedup 3.54732)
+tricount time:         0.004727 sec (dot product method)
+tri+prep time:         0.005179 sec (incl time to compute L and U)
+compute C time:        0.004378 sec
+reduce (C) time:       0.000349 sec
+rate    27.56 million edges/sec (incl time for U=triu(A))
+rate    30.20 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005409 sec (nthreads: 8 speedup 2.87118)
+tricount time:         0.005899 sec (dot product method)
+tri+prep time:         0.006350 sec (incl time to compute L and U)
+compute C time:        0.005409 sec
+reduce (C) time:       0.000490 sec
+rate    22.48 million edges/sec (incl time for U=triu(A))
+rate    24.20 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.006027 sec
-tricount time:         0.006634 sec (saxpy method)
-tri+prep time:         0.012025 sec (incl time to compute L)
-compute C time:        0.006027 sec
-reduce (C) time:       0.000608 sec
-rate      11.87 million edges/sec (incl time for L=tril(A))
-rate      21.52 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.004789 sec (nthreads: 2 speedup 1.25855)
-tricount time:         0.005109 sec (saxpy method)
-tri+prep time:         0.010500 sec (incl time to compute L)
-compute C time:        0.004789 sec
-reduce (C) time:       0.000320 sec
-rate      13.60 million edges/sec (incl time for L=tril(A))
-rate      27.94 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.002640 sec (nthreads: 4 speedup 2.28312)
-tricount time:         0.002811 sec (saxpy method)
-tri+prep time:         0.008202 sec (incl time to compute L)
-compute C time:        0.002640 sec
-reduce (C) time:       0.000172 sec
-rate      17.40 million edges/sec (incl time for L=tril(A))
-rate      50.78 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001769 sec (nthreads: 8 speedup 3.40634)
-tricount time:         0.001864 sec (saxpy method)
-tri+prep time:         0.007255 sec (incl time to compute L)
-compute C time:        0.001769 sec
-reduce (C) time:       0.000094 sec
-rate      19.68 million edges/sec (incl time for L=tril(A))
-rate      76.60 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001771 sec (nthreads: 16 speedup 3.40213)
-tricount time:         0.001840 sec (saxpy method)
-tri+prep time:         0.007231 sec (incl time to compute L)
-compute C time:        0.001771 sec
-reduce (C) time:       0.000069 sec
-rate      19.74 million edges/sec (incl time for L=tril(A))
-rate      77.56 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.005359 sec (nthreads: 32 speedup 1.12463)
-tricount time:         0.005507 sec (saxpy method)
-tri+prep time:         0.010898 sec (incl time to compute L)
-compute C time:        0.005359 sec
-reduce (C) time:       0.000148 sec
-rate      13.10 million edges/sec (incl time for L=tril(A))
-rate      25.92 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.011343 sec
+tricount time:         0.011732 sec (saxpy method)
+tri+prep time:         0.011946 sec (incl time to compute L)
+compute C time:        0.011343 sec
+reduce (C) time:       0.000390 sec
+rate    11.95 million edges/sec (incl time for L=tril(A))
+rate    12.17 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.006111 sec (nthreads: 2 speedup 1.85609)
+tricount time:         0.006302 sec (saxpy method)
+tri+prep time:         0.006516 sec (incl time to compute L)
+compute C time:        0.006111 sec
+reduce (C) time:       0.000191 sec
+rate    21.91 million edges/sec (incl time for L=tril(A))
+rate    22.65 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.003317 sec (nthreads: 4 speedup 3.4192)
+tricount time:         0.003506 sec (saxpy method)
+tri+prep time:         0.003720 sec (incl time to compute L)
+compute C time:        0.003317 sec
+reduce (C) time:       0.000189 sec
+rate    38.37 million edges/sec (incl time for L=tril(A))
+rate    40.71 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.004551 sec (nthreads: 8 speedup 2.49242)
+tricount time:         0.004741 sec (saxpy method)
+tri+prep time:         0.004955 sec (incl time to compute L)
+compute C time:        0.004551 sec
+reduce (C) time:       0.000190 sec
+rate    28.81 million edges/sec (incl time for L=tril(A))
+rate    30.11 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 183 by 183, 1402 entries, from stdin
 
-total time to read A matrix:       0.001384 sec
+total time to read A matrix:       0.000583 sec
 
 n 183 # edges 701
-U=triu(A) time:        0.000050 sec
-L=tril(A) time:        0.000021 sec
+U=triu(A) time:        0.000028 sec
+L=tril(A) time:        0.000010 sec
 
 ------------------------------------- dot product method:
 # triangles 863
-L*U' time (dot):         0.000082 sec
-tricount time:         0.000109 sec (dot product method)
-tri+prep time:         0.000179 sec (incl time to compute L and U)
-compute C time:        0.000082 sec
-reduce (C) time:       0.000027 sec
-rate       3.91 million edges/sec (incl time for U=triu(A))
-rate       6.45 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000064 sec (nthreads: 2 speedup 1.29174)
-tricount time:         0.000077 sec (dot product method)
-tri+prep time:         0.000147 sec (incl time to compute L and U)
-compute C time:        0.000064 sec
-reduce (C) time:       0.000013 sec
-rate       4.76 million edges/sec (incl time for U=triu(A))
-rate       9.14 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000058 sec (nthreads: 4 speedup 1.41933)
-tricount time:         0.000070 sec (dot product method)
-tri+prep time:         0.000141 sec (incl time to compute L and U)
-compute C time:        0.000058 sec
-reduce (C) time:       0.000012 sec
-rate       4.98 million edges/sec (incl time for U=triu(A))
-rate       9.99 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000056 sec (nthreads: 8 speedup 1.47253)
-tricount time:         0.000068 sec (dot product method)
-tri+prep time:         0.000138 sec (incl time to compute L and U)
-compute C time:        0.000056 sec
-reduce (C) time:       0.000012 sec
-rate       5.08 million edges/sec (incl time for U=triu(A))
-rate      10.38 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000054 sec (nthreads: 16 speedup 1.51814)
-tricount time:         0.000066 sec (dot product method)
-tri+prep time:         0.000136 sec (incl time to compute L and U)
-compute C time:        0.000054 sec
-reduce (C) time:       0.000012 sec
-rate       5.15 million edges/sec (incl time for U=triu(A))
-rate      10.69 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000053 sec (nthreads: 32 speedup 1.54011)
-tricount time:         0.000065 sec (dot product method)
-tri+prep time:         0.000135 sec (incl time to compute L and U)
-compute C time:        0.000053 sec
-reduce (C) time:       0.000011 sec
-rate       5.18 million edges/sec (incl time for U=triu(A))
-rate      10.85 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000069 sec
-tricount time:         0.000082 sec (dot product method)
-tri+prep time:         0.000153 sec (incl time to compute L and U)
-compute C time:        0.000069 sec
-reduce (C) time:       0.000013 sec
-rate       4.59 million edges/sec (incl time for U=triu(A))
-rate       8.53 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000063 sec (nthreads: 2 speedup 1.08843)
-tricount time:         0.000076 sec (dot product method)
-tri+prep time:         0.000146 sec (incl time to compute L and U)
-compute C time:        0.000063 sec
-reduce (C) time:       0.000012 sec
-rate       4.79 million edges/sec (incl time for U=triu(A))
-rate       9.27 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000060 sec (nthreads: 4 speedup 1.14219)
+L*U' time (dot):         0.000061 sec
 tricount time:         0.000073 sec (dot product method)
-tri+prep time:         0.000143 sec (incl time to compute L and U)
-compute C time:        0.000060 sec
-reduce (C) time:       0.000012 sec
-rate       4.90 million edges/sec (incl time for U=triu(A))
-rate       9.66 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000057 sec (nthreads: 8 speedup 1.20568)
-tricount time:         0.000069 sec (dot product method)
-tri+prep time:         0.000140 sec (incl time to compute L and U)
-compute C time:        0.000057 sec
-reduce (C) time:       0.000012 sec
-rate       5.01 million edges/sec (incl time for U=triu(A))
-rate      10.12 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000056 sec (nthreads: 16 speedup 1.23602)
-tricount time:         0.000068 sec (dot product method)
-tri+prep time:         0.000139 sec (incl time to compute L and U)
-compute C time:        0.000056 sec
+tri+prep time:         0.000111 sec (incl time to compute L and U)
+compute C time:        0.000061 sec
 reduce (C) time:       0.000012 sec
-rate       5.06 million edges/sec (incl time for U=triu(A))
-rate      10.31 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000057 sec (nthreads: 32 speedup 1.20982)
-tricount time:         0.000069 sec (dot product method)
-tri+prep time:         0.000140 sec (incl time to compute L and U)
-compute C time:        0.000057 sec
-reduce (C) time:       0.000013 sec
-rate       5.01 million edges/sec (incl time for U=triu(A))
-rate      10.10 million edges/sec (just tricount itself)
-
------------------------------------ saxpy method:
-C<L>=L*L time (saxpy):         0.000048 sec
-tricount time:         0.000050 sec (saxpy method)
-tri+prep time:         0.000071 sec (incl time to compute L)
+rate     6.32 million edges/sec (incl time for U=triu(A))
+rate     9.62 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000042 sec (nthreads: 2 speedup 1.46194)
+tricount time:         0.000050 sec (dot product method)
+tri+prep time:         0.000088 sec (incl time to compute L and U)
+compute C time:        0.000042 sec
+reduce (C) time:       0.000008 sec
+rate     7.99 million edges/sec (incl time for U=triu(A))
+rate    14.11 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000038 sec (nthreads: 4 speedup 1.60703)
+tricount time:         0.000045 sec (dot product method)
+tri+prep time:         0.000083 sec (incl time to compute L and U)
+compute C time:        0.000038 sec
+reduce (C) time:       0.000007 sec
+rate     8.44 million edges/sec (incl time for U=triu(A))
+rate    15.55 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000037 sec (nthreads: 8 speedup 1.66533)
+tricount time:         0.000043 sec (dot product method)
+tri+prep time:         0.000081 sec (incl time to compute L and U)
+compute C time:        0.000037 sec
+reduce (C) time:       0.000007 sec
+rate     8.60 million edges/sec (incl time for U=triu(A))
+rate    16.12 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000048 sec
+tricount time:         0.000055 sec (dot product method)
+tri+prep time:         0.000093 sec (incl time to compute L and U)
 compute C time:        0.000048 sec
-reduce (C) time:       0.000003 sec
-rate       9.84 million edges/sec (incl time for L=tril(A))
-rate      13.91 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000033 sec (nthreads: 2 speedup 1.43173)
-tricount time:         0.000036 sec (saxpy method)
-tri+prep time:         0.000057 sec (incl time to compute L)
-compute C time:        0.000033 sec
-reduce (C) time:       0.000003 sec
-rate      12.37 million edges/sec (incl time for L=tril(A))
-rate      19.58 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000029 sec (nthreads: 4 speedup 1.66278)
-tricount time:         0.000031 sec (saxpy method)
-tri+prep time:         0.000052 sec (incl time to compute L)
-compute C time:        0.000029 sec
-reduce (C) time:       0.000002 sec
-rate      13.49 million edges/sec (incl time for L=tril(A))
-rate      22.55 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000026 sec (nthreads: 8 speedup 1.81865)
-tricount time:         0.000029 sec (saxpy method)
-tri+prep time:         0.000049 sec (incl time to compute L)
-compute C time:        0.000026 sec
-reduce (C) time:       0.000002 sec
-rate      14.17 million edges/sec (incl time for L=tril(A))
-rate      24.52 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000024 sec (nthreads: 16 speedup 2.0008)
-tricount time:         0.000026 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000024 sec
-reduce (C) time:       0.000002 sec
-rate      14.90 million edges/sec (incl time for L=tril(A))
-rate      26.77 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000022 sec (nthreads: 32 speedup 2.1677)
-tricount time:         0.000024 sec (saxpy method)
-tri+prep time:         0.000045 sec (incl time to compute L)
-compute C time:        0.000022 sec
-reduce (C) time:       0.000002 sec
-rate      15.50 million edges/sec (incl time for L=tril(A))
-rate      28.80 million edges/sec (just tricount itself)
-
---------------------------------------------------------------
-matrix 63 by 63, 246 entries, from stdin
-
-total time to read A matrix:       0.000729 sec
-
-n 63 # edges 123
-U=triu(A) time:        0.000040 sec
-L=tril(A) time:        0.000017 sec
-
-------------------------------------- dot product method:
-# triangles 0
-L*U' time (dot):         0.000030 sec
-tricount time:         0.000054 sec (dot product method)
-tri+prep time:         0.000111 sec (incl time to compute L and U)
-compute C time:        0.000030 sec
-reduce (C) time:       0.000024 sec
-rate       1.10 million edges/sec (incl time for U=triu(A))
-rate       2.26 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000013 sec (nthreads: 2 speedup 2.41227)
-tricount time:         0.000023 sec (dot product method)
-tri+prep time:         0.000080 sec (incl time to compute L and U)
-compute C time:        0.000013 sec
-reduce (C) time:       0.000010 sec
-rate       1.54 million edges/sec (incl time for U=triu(A))
-rate       5.39 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 3.0259)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000076 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       1.61 million edges/sec (incl time for U=triu(A))
-rate       6.33 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 3.26611)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.63 million edges/sec (incl time for U=triu(A))
-rate       6.63 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 3.23692)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000076 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000010 sec
-rate       1.62 million edges/sec (incl time for U=triu(A))
-rate       6.49 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 32 speedup 3.25594)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.63 million edges/sec (incl time for U=triu(A))
-rate       6.65 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec
-tricount time:         0.000020 sec (dot product method)
-tri+prep time:         0.000077 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000010 sec
-rate       1.59 million edges/sec (incl time for U=triu(A))
-rate       6.07 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 2 speedup 1.05855)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000076 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       1.62 million edges/sec (incl time for U=triu(A))
-rate       6.46 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 1.07856)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000076 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       1.63 million edges/sec (incl time for U=triu(A))
-rate       6.57 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 1.11023)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.64 million edges/sec (incl time for U=triu(A))
-rate       6.72 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 1.12813)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.64 million edges/sec (incl time for U=triu(A))
-rate       6.83 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000011 sec (nthreads: 32 speedup 0.911859)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000078 sec (incl time to compute L and U)
-compute C time:        0.000011 sec
-reduce (C) time:       0.000009 sec
-rate       1.59 million edges/sec (incl time for U=triu(A))
-rate       5.93 million edges/sec (just tricount itself)
+reduce (C) time:       0.000007 sec
+rate     7.52 million edges/sec (incl time for U=triu(A))
+rate    12.70 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000043 sec (nthreads: 2 speedup 1.11068)
+tricount time:         0.000050 sec (dot product method)
+tri+prep time:         0.000088 sec (incl time to compute L and U)
+compute C time:        0.000043 sec
+reduce (C) time:       0.000007 sec
+rate     7.96 million edges/sec (incl time for U=triu(A))
+rate    14.01 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000041 sec (nthreads: 4 speedup 1.17128)
+tricount time:         0.000048 sec (dot product method)
+tri+prep time:         0.000086 sec (incl time to compute L and U)
+compute C time:        0.000041 sec
+reduce (C) time:       0.000007 sec
+rate     8.20 million edges/sec (incl time for U=triu(A))
+rate    14.75 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000038 sec (nthreads: 8 speedup 1.25408)
+tricount time:         0.000044 sec (dot product method)
+tri+prep time:         0.000082 sec (incl time to compute L and U)
+compute C time:        0.000038 sec
+reduce (C) time:       0.000006 sec
+rate     8.50 million edges/sec (incl time for U=triu(A))
+rate    15.77 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000038 sec
-tricount time:         0.000039 sec (saxpy method)
-tri+prep time:         0.000057 sec (incl time to compute L)
-compute C time:        0.000038 sec
-reduce (C) time:       0.000001 sec
-rate       2.17 million edges/sec (incl time for L=tril(A))
-rate       3.13 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000029 sec (nthreads: 2 speedup 1.34714)
-tricount time:         0.000029 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000029 sec
-reduce (C) time:       0.000001 sec
-rate       2.64 million edges/sec (incl time for L=tril(A))
-rate       4.20 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000029 sec (nthreads: 4 speedup 1.30621)
-tricount time:         0.000030 sec (saxpy method)
-tri+prep time:         0.000047 sec (incl time to compute L)
-compute C time:        0.000029 sec
-reduce (C) time:       0.000001 sec
-rate       2.59 million edges/sec (incl time for L=tril(A))
-rate       4.08 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000037 sec (nthreads: 8 speedup 1.04853)
-tricount time:         0.000037 sec (saxpy method)
+C<L>=L*L time (saxpy):         0.000044 sec
+tricount time:         0.000046 sec (saxpy method)
 tri+prep time:         0.000055 sec (incl time to compute L)
-compute C time:        0.000037 sec
+compute C time:        0.000044 sec
+reduce (C) time:       0.000001 sec
+rate    12.67 million edges/sec (incl time for L=tril(A))
+rate    15.36 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000034 sec (nthreads: 2 speedup 1.30181)
+tricount time:         0.000035 sec (saxpy method)
+tri+prep time:         0.000045 sec (incl time to compute L)
+compute C time:        0.000034 sec
 reduce (C) time:       0.000001 sec
-rate       2.25 million edges/sec (incl time for L=tril(A))
-rate       3.29 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000051 sec (nthreads: 16 speedup 0.758835)
-tricount time:         0.000051 sec (saxpy method)
-tri+prep time:         0.000069 sec (incl time to compute L)
-compute C time:        0.000051 sec
+rate    15.60 million edges/sec (incl time for L=tril(A))
+rate    19.89 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000031 sec (nthreads: 4 speedup 1.4348)
+tricount time:         0.000032 sec (saxpy method)
+tri+prep time:         0.000042 sec (incl time to compute L)
+compute C time:        0.000031 sec
 reduce (C) time:       0.000001 sec
-rate       1.79 million edges/sec (incl time for L=tril(A))
-rate       2.39 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000081 sec (nthreads: 32 speedup 0.471659)
-tricount time:         0.000082 sec (saxpy method)
-tri+prep time:         0.000100 sec (incl time to compute L)
-compute C time:        0.000081 sec
+rate    16.79 million edges/sec (incl time for L=tril(A))
+rate    21.86 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000030 sec (nthreads: 8 speedup 1.46692)
+tricount time:         0.000031 sec (saxpy method)
+tri+prep time:         0.000041 sec (incl time to compute L)
+compute C time:        0.000030 sec
 reduce (C) time:       0.000001 sec
-rate       1.23 million edges/sec (incl time for L=tril(A))
-rate       1.49 million edges/sec (just tricount itself)
+rate    17.03 million edges/sec (incl time for L=tril(A))
+rate    22.28 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 63 by 63, 246 entries, from stdin
 
-total time to read A matrix:       0.000668 sec
+total time to read A matrix:       0.000168 sec
 
 n 63 # edges 123
-U=triu(A) time:        0.000041 sec
-L=tril(A) time:        0.000016 sec
+U=triu(A) time:        0.000016 sec
+L=tril(A) time:        0.000004 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000021 sec
-tricount time:         0.000043 sec (dot product method)
-tri+prep time:         0.000100 sec (incl time to compute L and U)
-compute C time:        0.000021 sec
-reduce (C) time:       0.000022 sec
-rate       1.23 million edges/sec (incl time for U=triu(A))
-rate       2.85 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000011 sec (nthreads: 2 speedup 1.96243)
-tricount time:         0.000020 sec (dot product method)
-tri+prep time:         0.000077 sec (incl time to compute L and U)
-compute C time:        0.000011 sec
-reduce (C) time:       0.000009 sec
-rate       1.59 million edges/sec (incl time for U=triu(A))
-rate       6.08 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 2.39897)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000008 sec
-rate       1.65 million edges/sec (incl time for U=triu(A))
-rate       7.09 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 8 speedup 2.66572)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.68 million edges/sec (incl time for U=triu(A))
-rate       7.65 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 16 speedup 2.58776)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000074 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.66 million edges/sec (incl time for U=triu(A))
-rate       7.35 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 32 speedup 2.60765)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.68 million edges/sec (incl time for U=triu(A))
-rate       7.58 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.63 million edges/sec (incl time for U=triu(A))
-rate       6.80 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 2 speedup 1.10401)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000074 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.67 million edges/sec (incl time for U=triu(A))
-rate       7.45 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 4 speedup 1.14452)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.68 million edges/sec (incl time for U=triu(A))
-rate       7.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 8 speedup 1.17376)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.69 million edges/sec (incl time for U=triu(A))
-rate       7.84 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 16 speedup 1.15621)
-tricount time:         0.000016 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000008 sec
-rate       1.69 million edges/sec (incl time for U=triu(A))
-rate       7.89 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 32 speedup 0.942341)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000008 sec
-rate       1.63 million edges/sec (incl time for U=triu(A))
-rate       6.77 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000016 sec
+tricount time:         0.000022 sec (dot product method)
+tri+prep time:         0.000042 sec (incl time to compute L and U)
+compute C time:        0.000016 sec
+reduce (C) time:       0.000006 sec
+rate     2.90 million edges/sec (incl time for U=triu(A))
+rate     5.52 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 3.74828)
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000028 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     4.43 million edges/sec (incl time for U=triu(A))
+rate    16.19 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 4.66372)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000027 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     4.56 million edges/sec (incl time for U=triu(A))
+rate    18.14 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 4.82743)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     4.70 million edges/sec (incl time for U=triu(A))
+rate    20.55 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000027 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     4.53 million edges/sec (incl time for U=triu(A))
+rate    17.55 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 2 speedup 1.17227)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     4.64 million edges/sec (incl time for U=triu(A))
+rate    19.46 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 4 speedup 1.1764)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000002 sec
+rate     4.73 million edges/sec (incl time for U=triu(A))
+rate    21.10 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 1.22307)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000002 sec
+rate     4.76 million edges/sec (incl time for U=triu(A))
+rate    21.74 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000035 sec
-tricount time:         0.000036 sec (saxpy method)
-tri+prep time:         0.000052 sec (incl time to compute L)
-compute C time:        0.000035 sec
-reduce (C) time:       0.000001 sec
-rate       2.38 million edges/sec (incl time for L=tril(A))
-rate       3.46 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000026 sec (nthreads: 2 speedup 1.36151)
-tricount time:         0.000026 sec (saxpy method)
-tri+prep time:         0.000043 sec (incl time to compute L)
-compute C time:        0.000026 sec
-reduce (C) time:       0.000001 sec
-rate       2.89 million edges/sec (incl time for L=tril(A))
-rate       4.68 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000027 sec (nthreads: 4 speedup 1.28473)
-tricount time:         0.000028 sec (saxpy method)
-tri+prep time:         0.000044 sec (incl time to compute L)
-compute C time:        0.000027 sec
-reduce (C) time:       0.000001 sec
-rate       2.79 million edges/sec (incl time for L=tril(A))
-rate       4.43 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 8 speedup 1.00668)
-tricount time:         0.000035 sec (saxpy method)
-tri+prep time:         0.000052 sec (incl time to compute L)
-compute C time:        0.000035 sec
-reduce (C) time:       0.000001 sec
-rate       2.39 million edges/sec (incl time for L=tril(A))
-rate       3.49 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000048 sec (nthreads: 16 speedup 0.732287)
-tricount time:         0.000048 sec (saxpy method)
-tri+prep time:         0.000065 sec (incl time to compute L)
-compute C time:        0.000048 sec
-reduce (C) time:       0.000001 sec
-rate       1.90 million edges/sec (incl time for L=tril(A))
-rate       2.55 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000077 sec (nthreads: 32 speedup 0.454815)
-tricount time:         0.000077 sec (saxpy method)
-tri+prep time:         0.000094 sec (incl time to compute L)
-compute C time:        0.000077 sec
-reduce (C) time:       0.000001 sec
-rate       1.31 million edges/sec (incl time for L=tril(A))
-rate       1.59 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000020 sec
+tricount time:         0.000020 sec (saxpy method)
+tri+prep time:         0.000025 sec (incl time to compute L)
+compute C time:        0.000020 sec
+reduce (C) time:       0.000000 sec
+rate     4.97 million edges/sec (incl time for L=tril(A))
+rate     6.07 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 2 speedup 1.68975)
+tricount time:         0.000012 sec (saxpy method)
+tri+prep time:         0.000017 sec (incl time to compute L)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000000 sec
+rate     7.44 million edges/sec (incl time for L=tril(A))
+rate    10.22 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000013 sec (nthreads: 4 speedup 1.59131)
+tricount time:         0.000013 sec (saxpy method)
+tri+prep time:         0.000017 sec (incl time to compute L)
+compute C time:        0.000013 sec
+reduce (C) time:       0.000000 sec
+rate     7.13 million edges/sec (incl time for L=tril(A))
+rate     9.63 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000017 sec (nthreads: 8 speedup 1.18573)
+tricount time:         0.000017 sec (saxpy method)
+tri+prep time:         0.000022 sec (incl time to compute L)
+compute C time:        0.000017 sec
+reduce (C) time:       0.000000 sec
+rate     5.69 million edges/sec (incl time for L=tril(A))
+rate     7.19 million edges/sec (just tricount itself)
+
+--------------------------------------------------------------
+matrix 63 by 63, 246 entries, from stdin
+
+total time to read A matrix:       0.000170 sec
+
+n 63 # edges 123
+U=triu(A) time:        0.000021 sec
+L=tril(A) time:        0.000005 sec
+
+------------------------------------- dot product method:
+# triangles 0
+L*U' time (dot):         0.000016 sec
+tricount time:         0.000022 sec (dot product method)
+tri+prep time:         0.000048 sec (incl time to compute L and U)
+compute C time:        0.000016 sec
+reduce (C) time:       0.000006 sec
+rate     2.59 million edges/sec (incl time for U=triu(A))
+rate     5.63 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000007 sec (nthreads: 2 speedup 2.38811)
+tricount time:         0.000012 sec (dot product method)
+tri+prep time:         0.000038 sec (incl time to compute L and U)
+compute C time:        0.000007 sec
+reduce (C) time:       0.000005 sec
+rate     3.27 million edges/sec (incl time for U=triu(A))
+rate    10.33 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000007 sec (nthreads: 4 speedup 2.43022)
+tricount time:         0.000012 sec (dot product method)
+tri+prep time:         0.000037 sec (incl time to compute L and U)
+compute C time:        0.000007 sec
+reduce (C) time:       0.000005 sec
+rate     3.28 million edges/sec (incl time for U=triu(A))
+rate    10.43 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000006 sec (nthreads: 8 speedup 2.53585)
+tricount time:         0.000011 sec (dot product method)
+tri+prep time:         0.000037 sec (incl time to compute L and U)
+compute C time:        0.000006 sec
+reduce (C) time:       0.000005 sec
+rate     3.36 million edges/sec (incl time for U=triu(A))
+rate    11.28 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000007 sec
+tricount time:         0.000012 sec (dot product method)
+tri+prep time:         0.000037 sec (incl time to compute L and U)
+compute C time:        0.000007 sec
+reduce (C) time:       0.000005 sec
+rate     3.28 million edges/sec (incl time for U=triu(A))
+rate    10.40 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000006 sec (nthreads: 2 speedup 1.13474)
+tricount time:         0.000011 sec (dot product method)
+tri+prep time:         0.000036 sec (incl time to compute L and U)
+compute C time:        0.000006 sec
+reduce (C) time:       0.000005 sec
+rate     3.38 million edges/sec (incl time for U=triu(A))
+rate    11.43 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000006 sec (nthreads: 4 speedup 1.07643)
+tricount time:         0.000011 sec (dot product method)
+tri+prep time:         0.000037 sec (incl time to compute L and U)
+compute C time:        0.000006 sec
+reduce (C) time:       0.000005 sec
+rate     3.34 million edges/sec (incl time for U=triu(A))
+rate    11.04 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000006 sec (nthreads: 8 speedup 1.1264)
+tricount time:         0.000011 sec (dot product method)
+tri+prep time:         0.000036 sec (incl time to compute L and U)
+compute C time:        0.000006 sec
+reduce (C) time:       0.000005 sec
+rate     3.38 million edges/sec (incl time for U=triu(A))
+rate    11.45 million edges/sec (just tricount itself)
+
+----------------------------------- saxpy method:
+C<L>=L*L time (saxpy):         0.000023 sec
+tricount time:         0.000023 sec (saxpy method)
+tri+prep time:         0.000028 sec (incl time to compute L)
+compute C time:        0.000023 sec
+reduce (C) time:       0.000000 sec
+rate     4.39 million edges/sec (incl time for L=tril(A))
+rate     5.29 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 2 speedup 1.27137)
+tricount time:         0.000018 sec (saxpy method)
+tri+prep time:         0.000023 sec (incl time to compute L)
+compute C time:        0.000018 sec
+reduce (C) time:       0.000000 sec
+rate     5.33 million edges/sec (incl time for L=tril(A))
+rate     6.73 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 4 speedup 1.29131)
+tricount time:         0.000018 sec (saxpy method)
+tri+prep time:         0.000023 sec (incl time to compute L)
+compute C time:        0.000018 sec
+reduce (C) time:       0.000000 sec
+rate     5.39 million edges/sec (incl time for L=tril(A))
+rate     6.83 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000020 sec (nthreads: 8 speedup 1.1211)
+tricount time:         0.000021 sec (saxpy method)
+tri+prep time:         0.000026 sec (incl time to compute L)
+compute C time:        0.000020 sec
+reduce (C) time:       0.000000 sec
+rate     4.82 million edges/sec (incl time for L=tril(A))
+rate     5.93 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 78 by 78, 204 entries, from stdin
 
-total time to read A matrix:       0.000705 sec
+total time to read A matrix:       0.000157 sec
 
 n 78 # edges 102
-U=triu(A) time:        0.000039 sec
-L=tril(A) time:        0.000017 sec
+U=triu(A) time:        0.000015 sec
+L=tril(A) time:        0.000005 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000029 sec
-tricount time:         0.000052 sec (dot product method)
-tri+prep time:         0.000108 sec (incl time to compute L and U)
-compute C time:        0.000029 sec
-reduce (C) time:       0.000023 sec
-rate       0.95 million edges/sec (incl time for U=triu(A))
-rate       1.96 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000012 sec (nthreads: 2 speedup 2.52629)
-tricount time:         0.000021 sec (dot product method)
-tri+prep time:         0.000077 sec (incl time to compute L and U)
-compute C time:        0.000012 sec
-reduce (C) time:       0.000010 sec
-rate       1.33 million edges/sec (incl time for U=triu(A))
-rate       4.75 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 4 speedup 3.17835)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000074 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.38 million edges/sec (incl time for U=triu(A))
-rate       5.53 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 3.3126)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.40 million edges/sec (incl time for U=triu(A))
-rate       5.83 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 3.40546)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.40 million edges/sec (incl time for U=triu(A))
-rate       5.94 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 32 speedup 3.17867)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.39 million edges/sec (incl time for U=triu(A))
-rate       5.77 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000074 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       1.37 million edges/sec (incl time for U=triu(A))
-rate       5.43 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 2 speedup 1.07514)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.40 million edges/sec (incl time for U=triu(A))
-rate       5.79 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000008 sec (nthreads: 4 speedup 1.13956)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000008 sec
-reduce (C) time:       0.000009 sec
-rate       1.41 million edges/sec (incl time for U=triu(A))
-rate       5.96 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 1.0621)
-tricount time:         0.000018 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.39 million edges/sec (incl time for U=triu(A))
-rate       5.70 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000009 sec (nthreads: 16 speedup 1.12574)
-tricount time:         0.000017 sec (dot product method)
-tri+prep time:         0.000073 sec (incl time to compute L and U)
-compute C time:        0.000009 sec
-reduce (C) time:       0.000009 sec
-rate       1.40 million edges/sec (incl time for U=triu(A))
-rate       5.93 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000010 sec (nthreads: 32 speedup 0.924325)
-tricount time:         0.000019 sec (dot product method)
-tri+prep time:         0.000075 sec (incl time to compute L and U)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000009 sec
-rate       1.36 million edges/sec (incl time for U=triu(A))
-rate       5.29 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000021 sec
+tricount time:         0.000026 sec (dot product method)
+tri+prep time:         0.000047 sec (incl time to compute L and U)
+compute C time:        0.000021 sec
+reduce (C) time:       0.000006 sec
+rate     2.19 million edges/sec (incl time for U=triu(A))
+rate     3.88 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 2 speedup 4.80607)
+tricount time:         0.000008 sec (dot product method)
+tri+prep time:         0.000028 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     3.65 million edges/sec (incl time for U=triu(A))
+rate    13.29 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec (nthreads: 4 speedup 5.43748)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000027 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     3.79 million edges/sec (incl time for U=triu(A))
+rate    15.37 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 6.34682)
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000027 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000004 sec
+rate     3.74 million edges/sec (incl time for U=triu(A))
+rate    14.49 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000004 sec
+tricount time:         0.000007 sec (dot product method)
+tri+prep time:         0.000028 sec (incl time to compute L and U)
+compute C time:        0.000004 sec
+reduce (C) time:       0.000003 sec
+rate     3.70 million edges/sec (incl time for U=triu(A))
+rate    13.98 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 2 speedup 1.15271)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000027 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     3.83 million edges/sec (incl time for U=triu(A))
+rate    16.06 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 4 speedup 1.14478)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     3.87 million edges/sec (incl time for U=triu(A))
+rate    16.79 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000003 sec (nthreads: 8 speedup 1.13797)
+tricount time:         0.000006 sec (dot product method)
+tri+prep time:         0.000026 sec (incl time to compute L and U)
+compute C time:        0.000003 sec
+reduce (C) time:       0.000003 sec
+rate     3.87 million edges/sec (incl time for U=triu(A))
+rate    16.68 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000036 sec
-tricount time:         0.000037 sec (saxpy method)
-tri+prep time:         0.000053 sec (incl time to compute L)
-compute C time:        0.000036 sec
-reduce (C) time:       0.000001 sec
-rate       1.91 million edges/sec (incl time for L=tril(A))
-rate       2.78 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000027 sec (nthreads: 2 speedup 1.33639)
-tricount time:         0.000028 sec (saxpy method)
-tri+prep time:         0.000044 sec (incl time to compute L)
-compute C time:        0.000027 sec
-reduce (C) time:       0.000001 sec
-rate       2.30 million edges/sec (incl time for L=tril(A))
-rate       3.71 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000028 sec (nthreads: 4 speedup 1.28468)
-tricount time:         0.000029 sec (saxpy method)
-tri+prep time:         0.000045 sec (incl time to compute L)
-compute C time:        0.000028 sec
-reduce (C) time:       0.000001 sec
-rate       2.25 million edges/sec (incl time for L=tril(A))
-rate       3.58 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000035 sec (nthreads: 8 speedup 1.02284)
-tricount time:         0.000036 sec (saxpy method)
-tri+prep time:         0.000052 sec (incl time to compute L)
-compute C time:        0.000035 sec
-reduce (C) time:       0.000001 sec
-rate       1.94 million edges/sec (incl time for L=tril(A))
-rate       2.86 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000049 sec (nthreads: 16 speedup 0.733373)
-tricount time:         0.000050 sec (saxpy method)
-tri+prep time:         0.000066 sec (incl time to compute L)
-compute C time:        0.000049 sec
-reduce (C) time:       0.000001 sec
-rate       1.54 million edges/sec (incl time for L=tril(A))
-rate       2.06 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000081 sec (nthreads: 32 speedup 0.445319)
-tricount time:         0.000081 sec (saxpy method)
-tri+prep time:         0.000098 sec (incl time to compute L)
-compute C time:        0.000081 sec
-reduce (C) time:       0.000001 sec
-rate       1.04 million edges/sec (incl time for L=tril(A))
-rate       1.25 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000019 sec
+tricount time:         0.000019 sec (saxpy method)
+tri+prep time:         0.000024 sec (incl time to compute L)
+compute C time:        0.000019 sec
+reduce (C) time:       0.000000 sec
+rate     4.28 million edges/sec (incl time for L=tril(A))
+rate     5.35 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 2 speedup 1.51135)
+tricount time:         0.000013 sec (saxpy method)
+tri+prep time:         0.000017 sec (incl time to compute L)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000000 sec
+rate     5.86 million edges/sec (incl time for L=tril(A))
+rate     8.08 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000013 sec (nthreads: 4 speedup 1.43971)
+tricount time:         0.000013 sec (saxpy method)
+tri+prep time:         0.000018 sec (incl time to compute L)
+compute C time:        0.000013 sec
+reduce (C) time:       0.000000 sec
+rate     5.66 million edges/sec (incl time for L=tril(A))
+rate     7.70 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000025 sec (nthreads: 8 speedup 0.737911)
+tricount time:         0.000026 sec (saxpy method)
+tri+prep time:         0.000030 sec (incl time to compute L)
+compute C time:        0.000025 sec
+reduce (C) time:       0.000000 sec
+rate     3.35 million edges/sec (incl time for L=tril(A))
+rate     3.97 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 982 by 982, 99840 entries, from stdin
 
-total time to read A matrix:       0.039242 sec
+total time to read A matrix:       0.026821 sec
 
 n 982 # edges 49920
-U=triu(A) time:        0.000322 sec
-L=tril(A) time:        0.000322 sec
+U=triu(A) time:        0.000158 sec
+L=tril(A) time:        0.000129 sec
 
 ------------------------------------- dot product method:
 # triangles 0
-L*U' time (dot):         0.000875 sec
-tricount time:         0.001033 sec (dot product method)
-tri+prep time:         0.001677 sec (incl time to compute L and U)
-compute C time:        0.000875 sec
-reduce (C) time:       0.000158 sec
-rate      29.77 million edges/sec (incl time for U=triu(A))
-rate      48.33 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000484 sec (nthreads: 2 speedup 1.80794)
-tricount time:         0.000564 sec (dot product method)
-tri+prep time:         0.001208 sec (incl time to compute L and U)
-compute C time:        0.000484 sec
-reduce (C) time:       0.000080 sec
-rate      41.32 million edges/sec (incl time for U=triu(A))
-rate      88.45 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000300 sec (nthreads: 4 speedup 2.92012)
-tricount time:         0.000350 sec (dot product method)
-tri+prep time:         0.000994 sec (incl time to compute L and U)
-compute C time:        0.000300 sec
-reduce (C) time:       0.000051 sec
-rate      50.20 million edges/sec (incl time for U=triu(A))
-rate     142.44 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000295 sec (nthreads: 8 speedup 2.96421)
-tricount time:         0.000340 sec (dot product method)
-tri+prep time:         0.000984 sec (incl time to compute L and U)
-compute C time:        0.000295 sec
-reduce (C) time:       0.000045 sec
-rate      50.72 million edges/sec (incl time for U=triu(A))
-rate     146.70 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000486 sec (nthreads: 16 speedup 1.79995)
-tricount time:         0.000556 sec (dot product method)
-tri+prep time:         0.001200 sec (incl time to compute L and U)
-compute C time:        0.000486 sec
-reduce (C) time:       0.000069 sec
-rate      41.61 million edges/sec (incl time for U=triu(A))
-rate      89.83 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001858 sec (nthreads: 32 speedup 0.471047)
-tricount time:         0.001945 sec (dot product method)
-tri+prep time:         0.002589 sec (incl time to compute L and U)
-compute C time:        0.001858 sec
-reduce (C) time:       0.000086 sec
-rate      19.28 million edges/sec (incl time for U=triu(A))
-rate      25.67 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000549 sec
-tricount time:         0.000653 sec (dot product method)
-tri+prep time:         0.001297 sec (incl time to compute L and U)
-compute C time:        0.000549 sec
-reduce (C) time:       0.000104 sec
-rate      38.49 million edges/sec (incl time for U=triu(A))
-rate      76.44 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000369 sec (nthreads: 2 speedup 1.4892)
-tricount time:         0.000443 sec (dot product method)
-tri+prep time:         0.001087 sec (incl time to compute L and U)
-compute C time:        0.000369 sec
-reduce (C) time:       0.000074 sec
-rate      45.91 million edges/sec (incl time for U=triu(A))
-rate     112.59 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000257 sec (nthreads: 4 speedup 2.1371)
-tricount time:         0.000308 sec (dot product method)
-tri+prep time:         0.000952 sec (incl time to compute L and U)
-compute C time:        0.000257 sec
-reduce (C) time:       0.000051 sec
-rate      52.45 million edges/sec (incl time for U=triu(A))
-rate     162.16 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000303 sec (nthreads: 8 speedup 1.81454)
-tricount time:         0.000348 sec (dot product method)
-tri+prep time:         0.000991 sec (incl time to compute L and U)
-compute C time:        0.000303 sec
-reduce (C) time:       0.000045 sec
-rate      50.35 million edges/sec (incl time for U=triu(A))
-rate     143.64 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000429 sec (nthreads: 16 speedup 1.28043)
-tricount time:         0.000494 sec (dot product method)
-tri+prep time:         0.001138 sec (incl time to compute L and U)
-compute C time:        0.000429 sec
-reduce (C) time:       0.000065 sec
-rate      43.87 million edges/sec (incl time for U=triu(A))
-rate     101.02 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002481 sec (nthreads: 32 speedup 0.221444)
-tricount time:         0.002556 sec (dot product method)
-tri+prep time:         0.003200 sec (incl time to compute L and U)
-compute C time:        0.002481 sec
-reduce (C) time:       0.000075 sec
-rate      15.60 million edges/sec (incl time for U=triu(A))
-rate      19.53 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000394 sec
+tricount time:         0.000486 sec (dot product method)
+tri+prep time:         0.000774 sec (incl time to compute L and U)
+compute C time:        0.000394 sec
+reduce (C) time:       0.000092 sec
+rate    64.51 million edges/sec (incl time for U=triu(A))
+rate   102.66 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000377 sec (nthreads: 2 speedup 1.04644)
+tricount time:         0.000466 sec (dot product method)
+tri+prep time:         0.000754 sec (incl time to compute L and U)
+compute C time:        0.000377 sec
+reduce (C) time:       0.000089 sec
+rate    66.22 million edges/sec (incl time for U=triu(A))
+rate   107.08 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000313 sec (nthreads: 4 speedup 1.26114)
+tricount time:         0.000402 sec (dot product method)
+tri+prep time:         0.000689 sec (incl time to compute L and U)
+compute C time:        0.000313 sec
+reduce (C) time:       0.000089 sec
+rate    72.41 million edges/sec (incl time for U=triu(A))
+rate   124.26 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000290 sec (nthreads: 8 speedup 1.35934)
+tricount time:         0.000417 sec (dot product method)
+tri+prep time:         0.000705 sec (incl time to compute L and U)
+compute C time:        0.000290 sec
+reduce (C) time:       0.000127 sec
+rate    70.85 million edges/sec (incl time for U=triu(A))
+rate   119.74 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000431 sec
+tricount time:         0.000554 sec (dot product method)
+tri+prep time:         0.000842 sec (incl time to compute L and U)
+compute C time:        0.000431 sec
+reduce (C) time:       0.000123 sec
+rate    59.32 million edges/sec (incl time for U=triu(A))
+rate    90.13 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000338 sec (nthreads: 2 speedup 1.2737)
+tricount time:         0.000428 sec (dot product method)
+tri+prep time:         0.000715 sec (incl time to compute L and U)
+compute C time:        0.000338 sec
+reduce (C) time:       0.000089 sec
+rate    69.78 million edges/sec (incl time for U=triu(A))
+rate   116.70 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000297 sec (nthreads: 4 speedup 1.45141)
+tricount time:         0.000385 sec (dot product method)
+tri+prep time:         0.000672 sec (incl time to compute L and U)
+compute C time:        0.000297 sec
+reduce (C) time:       0.000088 sec
+rate    74.26 million edges/sec (incl time for U=triu(A))
+rate   129.79 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005728 sec (nthreads: 8 speedup 0.0752694)
+tricount time:         0.005857 sec (dot product method)
+tri+prep time:         0.006144 sec (incl time to compute L and U)
+compute C time:        0.005728 sec
+reduce (C) time:       0.000129 sec
+rate     8.12 million edges/sec (incl time for U=triu(A))
+rate     8.52 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000197 sec
-tricount time:         0.000200 sec (saxpy method)
-tri+prep time:         0.000522 sec (incl time to compute L)
-compute C time:        0.000197 sec
-reduce (C) time:       0.000003 sec
-rate      95.66 million edges/sec (incl time for L=tril(A))
-rate     249.62 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000194 sec (nthreads: 2 speedup 1.01487)
-tricount time:         0.000195 sec (saxpy method)
-tri+prep time:         0.000517 sec (incl time to compute L)
-compute C time:        0.000194 sec
+C<L>=L*L time (saxpy):         0.000281 sec
+tricount time:         0.000282 sec (saxpy method)
+tri+prep time:         0.000411 sec (incl time to compute L)
+compute C time:        0.000281 sec
 reduce (C) time:       0.000001 sec
-rate      96.50 million edges/sec (incl time for L=tril(A))
-rate     255.47 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000202 sec (nthreads: 4 speedup 0.976348)
-tricount time:         0.000203 sec (saxpy method)
-tri+prep time:         0.000525 sec (incl time to compute L)
-compute C time:        0.000202 sec
+rate   121.51 million edges/sec (incl time for L=tril(A))
+rate   177.33 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000272 sec (nthreads: 2 speedup 1.03328)
+tricount time:         0.000272 sec (saxpy method)
+tri+prep time:         0.000402 sec (incl time to compute L)
+compute C time:        0.000272 sec
 reduce (C) time:       0.000001 sec
-rate      95.12 million edges/sec (incl time for L=tril(A))
-rate     246.01 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000235 sec (nthreads: 8 speedup 0.837672)
-tricount time:         0.000236 sec (saxpy method)
-tri+prep time:         0.000558 sec (incl time to compute L)
-compute C time:        0.000235 sec
+rate   124.28 million edges/sec (incl time for L=tril(A))
+rate   183.31 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000196 sec (nthreads: 4 speedup 1.43261)
+tricount time:         0.000197 sec (saxpy method)
+tri+prep time:         0.000326 sec (incl time to compute L)
+compute C time:        0.000196 sec
 reduce (C) time:       0.000001 sec
-rate      89.46 million edges/sec (incl time for L=tril(A))
-rate     211.40 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000316 sec (nthreads: 16 speedup 0.624721)
-tricount time:         0.000317 sec (saxpy method)
-tri+prep time:         0.000638 sec (incl time to compute L)
-compute C time:        0.000316 sec
+rate   153.13 million edges/sec (incl time for L=tril(A))
+rate   253.85 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000191 sec (nthreads: 8 speedup 1.4685)
+tricount time:         0.000192 sec (saxpy method)
+tri+prep time:         0.000321 sec (incl time to compute L)
+compute C time:        0.000191 sec
 reduce (C) time:       0.000001 sec
-rate      78.19 million edges/sec (incl time for L=tril(A))
-rate     157.68 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000269 sec (nthreads: 32 speedup 0.7328)
-tricount time:         0.000270 sec (saxpy method)
-tri+prep time:         0.000592 sec (incl time to compute L)
-compute C time:        0.000269 sec
-reduce (C) time:       0.000001 sec
-rate      84.37 million edges/sec (incl time for L=tril(A))
-rate     185.05 million edges/sec (just tricount itself)
+rate   155.53 million edges/sec (incl time for L=tril(A))
+rate   260.51 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
 matrix 67 by 67, 574 entries, from stdin
 
-total time to read A matrix:       0.000555 sec
+total time to read A matrix:       0.000263 sec
 
 n 67 # edges 287
-U=triu(A) time:        0.000041 sec
-L=tril(A) time:        0.000019 sec
+U=triu(A) time:        0.000029 sec
+L=tril(A) time:        0.000007 sec
 
 ------------------------------------- dot product method:
 # triangles 120
-L*U' time (dot):         0.000039 sec
-tricount time:         0.000060 sec (dot product method)
-tri+prep time:         0.000120 sec (incl time to compute L and U)
-compute C time:        0.000039 sec
-reduce (C) time:       0.000021 sec
-rate       2.39 million edges/sec (incl time for U=triu(A))
-rate       4.80 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000023 sec (nthreads: 2 speedup 1.69008)
-tricount time:         0.000032 sec (dot product method)
-tri+prep time:         0.000093 sec (incl time to compute L and U)
-compute C time:        0.000023 sec
-reduce (C) time:       0.000009 sec
-rate       3.10 million edges/sec (incl time for U=triu(A))
-rate       8.89 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000019 sec (nthreads: 4 speedup 2.03667)
-tricount time:         0.000026 sec (dot product method)
-tri+prep time:         0.000087 sec (incl time to compute L and U)
-compute C time:        0.000019 sec
-reduce (C) time:       0.000007 sec
-rate       3.31 million edges/sec (incl time for U=triu(A))
-rate      10.94 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000018 sec (nthreads: 8 speedup 2.17405)
-tricount time:         0.000025 sec (dot product method)
-tri+prep time:         0.000085 sec (incl time to compute L and U)
-compute C time:        0.000018 sec
-reduce (C) time:       0.000007 sec
-rate       3.38 million edges/sec (incl time for U=triu(A))
-rate      11.63 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000018 sec (nthreads: 16 speedup 2.206)
-tricount time:         0.000025 sec (dot product method)
-tri+prep time:         0.000085 sec (incl time to compute L and U)
-compute C time:        0.000018 sec
-reduce (C) time:       0.000007 sec
-rate       3.38 million edges/sec (incl time for U=triu(A))
-rate      11.66 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000016 sec (nthreads: 32 speedup 2.3881)
-tricount time:         0.000023 sec (dot product method)
+L*U' time (dot):         0.000037 sec
+tricount time:         0.000047 sec (dot product method)
 tri+prep time:         0.000083 sec (incl time to compute L and U)
-compute C time:        0.000016 sec
-reduce (C) time:       0.000006 sec
-rate       3.46 million edges/sec (incl time for U=triu(A))
-rate      12.70 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000024 sec
-tricount time:         0.000032 sec (dot product method)
-tri+prep time:         0.000093 sec (incl time to compute L and U)
-compute C time:        0.000024 sec
+compute C time:        0.000037 sec
 reduce (C) time:       0.000009 sec
-rate       3.09 million edges/sec (incl time for U=triu(A))
-rate       8.84 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000021 sec (nthreads: 2 speedup 1.16181)
-tricount time:         0.000028 sec (dot product method)
-tri+prep time:         0.000088 sec (incl time to compute L and U)
-compute C time:        0.000021 sec
-reduce (C) time:       0.000007 sec
-rate       3.25 million edges/sec (incl time for U=triu(A))
-rate      10.31 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000020 sec (nthreads: 4 speedup 1.20184)
-tricount time:         0.000027 sec (dot product method)
-tri+prep time:         0.000087 sec (incl time to compute L and U)
-compute C time:        0.000020 sec
-reduce (C) time:       0.000007 sec
-rate       3.29 million edges/sec (incl time for U=triu(A))
-rate      10.70 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000017 sec (nthreads: 8 speedup 1.38132)
-tricount time:         0.000024 sec (dot product method)
-tri+prep time:         0.000084 sec (incl time to compute L and U)
-compute C time:        0.000017 sec
-reduce (C) time:       0.000006 sec
-rate       3.41 million edges/sec (incl time for U=triu(A))
-rate      12.08 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000017 sec (nthreads: 16 speedup 1.4011)
-tricount time:         0.000023 sec (dot product method)
-tri+prep time:         0.000084 sec (incl time to compute L and U)
-compute C time:        0.000017 sec
-reduce (C) time:       0.000006 sec
-rate       3.43 million edges/sec (incl time for U=triu(A))
-rate      12.31 million edges/sec (just tricount itself)
-L*U' time (dot):         0.000018 sec (nthreads: 32 speedup 1.30773)
-tricount time:         0.000025 sec (dot product method)
-tri+prep time:         0.000085 sec (incl time to compute L and U)
-compute C time:        0.000018 sec
-reduce (C) time:       0.000007 sec
-rate       3.36 million edges/sec (incl time for U=triu(A))
-rate      11.51 million edges/sec (just tricount itself)
+rate     3.46 million edges/sec (incl time for U=triu(A))
+rate     6.16 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000012 sec (nthreads: 2 speedup 3.08711)
+tricount time:         0.000016 sec (dot product method)
+tri+prep time:         0.000053 sec (incl time to compute L and U)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000004 sec
+rate     5.46 million edges/sec (incl time for U=triu(A))
+rate    17.74 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 3.72583)
+tricount time:         0.000013 sec (dot product method)
+tri+prep time:         0.000050 sec (incl time to compute L and U)
+compute C time:        0.000010 sec
+reduce (C) time:       0.000003 sec
+rate     5.78 million edges/sec (incl time for U=triu(A))
+rate    21.63 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000009 sec (nthreads: 8 speedup 4.15757)
+tricount time:         0.000013 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
+compute C time:        0.000009 sec
+reduce (C) time:       0.000003 sec
+rate     5.87 million edges/sec (incl time for U=triu(A))
+rate    22.94 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000014 sec
+tricount time:         0.000018 sec (dot product method)
+tri+prep time:         0.000054 sec (incl time to compute L and U)
+compute C time:        0.000014 sec
+reduce (C) time:       0.000004 sec
+rate     5.30 million edges/sec (incl time for U=triu(A))
+rate    16.09 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000012 sec (nthreads: 2 speedup 1.16494)
+tricount time:         0.000016 sec (dot product method)
+tri+prep time:         0.000052 sec (incl time to compute L and U)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000004 sec
+rate     5.53 million edges/sec (incl time for U=triu(A))
+rate    18.47 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000010 sec (nthreads: 4 speedup 1.32671)
+tricount time:         0.000014 sec (dot product method)
+tri+prep time:         0.000050 sec (incl time to compute L and U)
+compute C time:        0.000010 sec
+reduce (C) time:       0.000003 sec
+rate     5.73 million edges/sec (incl time for U=triu(A))
+rate    20.90 million edges/sec (just tricount itself)
+L*U' time (dot):         0.000010 sec (nthreads: 8 speedup 1.42599)
+tricount time:         0.000012 sec (dot product method)
+tri+prep time:         0.000049 sec (incl time to compute L and U)
+compute C time:        0.000010 sec
+reduce (C) time:       0.000003 sec
+rate     5.89 million edges/sec (incl time for U=triu(A))
+rate    23.17 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.000029 sec
-tricount time:         0.000031 sec (saxpy method)
-tri+prep time:         0.000050 sec (incl time to compute L)
-compute C time:        0.000029 sec
-reduce (C) time:       0.000001 sec
-rate       5.74 million edges/sec (incl time for L=tril(A))
-rate       9.34 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000018 sec (nthreads: 2 speedup 1.64245)
-tricount time:         0.000019 sec (saxpy method)
-tri+prep time:         0.000038 sec (incl time to compute L)
-compute C time:        0.000018 sec
+C<L>=L*L time (saxpy):         0.000033 sec
+tricount time:         0.000033 sec (saxpy method)
+tri+prep time:         0.000040 sec (incl time to compute L)
+compute C time:        0.000033 sec
 reduce (C) time:       0.000001 sec
-rate       7.50 million edges/sec (incl time for L=tril(A))
-rate      15.13 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000014 sec (nthreads: 4 speedup 2.14474)
+rate     7.13 million edges/sec (incl time for L=tril(A))
+rate     8.64 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000015 sec (nthreads: 2 speedup 2.19723)
 tricount time:         0.000015 sec (saxpy method)
-tri+prep time:         0.000034 sec (incl time to compute L)
-compute C time:        0.000014 sec
+tri+prep time:         0.000022 sec (incl time to compute L)
+compute C time:        0.000015 sec
 reduce (C) time:       0.000001 sec
-rate       8.44 million edges/sec (incl time for L=tril(A))
-rate      19.46 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 8 speedup 2.54277)
+rate    12.81 million edges/sec (incl time for L=tril(A))
+rate    18.67 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 4 speedup 2.71136)
 tricount time:         0.000013 sec (saxpy method)
-tri+prep time:         0.000032 sec (incl time to compute L)
+tri+prep time:         0.000020 sec (incl time to compute L)
+compute C time:        0.000012 sec
+reduce (C) time:       0.000000 sec
+rate    14.69 million edges/sec (incl time for L=tril(A))
+rate    22.95 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.000012 sec (nthreads: 8 speedup 2.68857)
+tricount time:         0.000013 sec (saxpy method)
+tri+prep time:         0.000020 sec (incl time to compute L)
 compute C time:        0.000012 sec
 reduce (C) time:       0.000001 sec
-rate       9.01 million edges/sec (incl time for L=tril(A))
-rate      22.82 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000010 sec (nthreads: 16 speedup 2.83248)
-tricount time:         0.000011 sec (saxpy method)
-tri+prep time:         0.000031 sec (incl time to compute L)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000001 sec
-rate       9.38 million edges/sec (incl time for L=tril(A))
-rate      25.35 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.000010 sec (nthreads: 32 speedup 2.95663)
-tricount time:         0.000011 sec (saxpy method)
-tri+prep time:         0.000030 sec (incl time to compute L)
-compute C time:        0.000010 sec
-reduce (C) time:       0.000001 sec
-rate       9.52 million edges/sec (incl time for L=tril(A))
-rate      26.40 million edges/sec (just tricount itself)
+rate    14.56 million edges/sec (incl time for L=tril(A))
+rate    22.62 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
-Wathen: nx 200 ny 200 n 120801 nz 1762400 method 0, time: 0.103 sec
+Wathen: nx 200 ny 200 n 120801 nz 1762400 method 0, time: 0.133 sec
 
-total time to read A matrix:       0.103766 sec
+total time to read A matrix:       0.135067 sec
 
 n 120801 # edges 881200
-U=triu(A) time:        0.003212 sec
-L=tril(A) time:        0.000764 sec
+U=triu(A) time:        0.002560 sec
+L=tril(A) time:        0.002522 sec
 
 ------------------------------------- dot product method:
 # triangles 2160400
-L*U' time (dot):         0.029728 sec
-tricount time:         0.036760 sec (dot product method)
-tri+prep time:         0.040736 sec (incl time to compute L and U)
-compute C time:        0.029728 sec
-reduce (C) time:       0.007033 sec
-rate      21.63 million edges/sec (incl time for U=triu(A))
-rate      23.97 million edges/sec (just tricount itself)
-L*U' time (dot):         0.016974 sec (nthreads: 2 speedup 1.75138)
-tricount time:         0.020563 sec (dot product method)
-tri+prep time:         0.024539 sec (incl time to compute L and U)
-compute C time:        0.016974 sec
-reduce (C) time:       0.003590 sec
-rate      35.91 million edges/sec (incl time for U=triu(A))
-rate      42.85 million edges/sec (just tricount itself)
-L*U' time (dot):         0.009622 sec (nthreads: 4 speedup 3.08943)
-tricount time:         0.011826 sec (dot product method)
-tri+prep time:         0.015802 sec (incl time to compute L and U)
-compute C time:        0.009622 sec
-reduce (C) time:       0.002204 sec
-rate      55.76 million edges/sec (incl time for U=triu(A))
-rate      74.51 million edges/sec (just tricount itself)
-L*U' time (dot):         0.005170 sec (nthreads: 8 speedup 5.74975)
-tricount time:         0.006656 sec (dot product method)
-tri+prep time:         0.010632 sec (incl time to compute L and U)
-compute C time:        0.005170 sec
-reduce (C) time:       0.001486 sec
-rate      82.88 million edges/sec (incl time for U=triu(A))
-rate     132.39 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003274 sec (nthreads: 16 speedup 9.08072)
-tricount time:         0.004067 sec (dot product method)
-tri+prep time:         0.008043 sec (incl time to compute L and U)
-compute C time:        0.003274 sec
-reduce (C) time:       0.000793 sec
-rate     109.56 million edges/sec (incl time for U=triu(A))
-rate     216.67 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003559 sec (nthreads: 32 speedup 8.35206)
-tricount time:         0.004233 sec (dot product method)
-tri+prep time:         0.008209 sec (incl time to compute L and U)
-compute C time:        0.003559 sec
-reduce (C) time:       0.000674 sec
-rate     107.35 million edges/sec (incl time for U=triu(A))
-rate     208.17 million edges/sec (just tricount itself)
-L*U' time (dot):         0.029669 sec
-tricount time:         0.036863 sec (dot product method)
-tri+prep time:         0.040839 sec (incl time to compute L and U)
-compute C time:        0.029669 sec
-reduce (C) time:       0.007194 sec
-rate      21.58 million edges/sec (incl time for U=triu(A))
-rate      23.90 million edges/sec (just tricount itself)
-L*U' time (dot):         0.016548 sec (nthreads: 2 speedup 1.79293)
-tricount time:         0.020389 sec (dot product method)
-tri+prep time:         0.024364 sec (incl time to compute L and U)
-compute C time:        0.016548 sec
-reduce (C) time:       0.003841 sec
-rate      36.17 million edges/sec (incl time for U=triu(A))
-rate      43.22 million edges/sec (just tricount itself)
-L*U' time (dot):         0.008599 sec (nthreads: 4 speedup 3.45043)
-tricount time:         0.010527 sec (dot product method)
-tri+prep time:         0.014503 sec (incl time to compute L and U)
-compute C time:        0.008599 sec
-reduce (C) time:       0.001928 sec
-rate      60.76 million edges/sec (incl time for U=triu(A))
-rate      83.71 million edges/sec (just tricount itself)
-L*U' time (dot):         0.005491 sec (nthreads: 8 speedup 5.40308)
-tricount time:         0.006799 sec (dot product method)
-tri+prep time:         0.010775 sec (incl time to compute L and U)
-compute C time:        0.005491 sec
-reduce (C) time:       0.001308 sec
-rate      81.78 million edges/sec (incl time for U=triu(A))
-rate     129.60 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003333 sec (nthreads: 16 speedup 8.90269)
-tricount time:         0.004135 sec (dot product method)
-tri+prep time:         0.008111 sec (incl time to compute L and U)
-compute C time:        0.003333 sec
-reduce (C) time:       0.000802 sec
-rate     108.64 million edges/sec (incl time for U=triu(A))
-rate     213.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003521 sec (nthreads: 32 speedup 8.42572)
-tricount time:         0.004265 sec (dot product method)
-tri+prep time:         0.008241 sec (incl time to compute L and U)
-compute C time:        0.003521 sec
-reduce (C) time:       0.000743 sec
-rate     106.93 million edges/sec (incl time for U=triu(A))
-rate     206.63 million edges/sec (just tricount itself)
+L*U' time (dot):         0.034713 sec
+tricount time:         0.038972 sec (dot product method)
+tri+prep time:         0.044054 sec (incl time to compute L and U)
+compute C time:        0.034713 sec
+reduce (C) time:       0.004259 sec
+rate    20.00 million edges/sec (incl time for U=triu(A))
+rate    22.61 million edges/sec (just tricount itself)
+L*U' time (dot):         0.022245 sec (nthreads: 2 speedup 1.56046)
+tricount time:         0.026859 sec (dot product method)
+tri+prep time:         0.031941 sec (incl time to compute L and U)
+compute C time:        0.022245 sec
+reduce (C) time:       0.004614 sec
+rate    27.59 million edges/sec (incl time for U=triu(A))
+rate    32.81 million edges/sec (just tricount itself)
+L*U' time (dot):         0.006826 sec (nthreads: 4 speedup 5.08539)
+tricount time:         0.008731 sec (dot product method)
+tri+prep time:         0.013813 sec (incl time to compute L and U)
+compute C time:        0.006826 sec
+reduce (C) time:       0.001905 sec
+rate    63.79 million edges/sec (incl time for U=triu(A))
+rate   100.93 million edges/sec (just tricount itself)
+L*U' time (dot):         0.008368 sec (nthreads: 8 speedup 4.14848)
+tricount time:         0.010726 sec (dot product method)
+tri+prep time:         0.015808 sec (incl time to compute L and U)
+compute C time:        0.008368 sec
+reduce (C) time:       0.002358 sec
+rate    55.74 million edges/sec (incl time for U=triu(A))
+rate    82.16 million edges/sec (just tricount itself)
+L*U' time (dot):         0.026884 sec
+tricount time:         0.031204 sec (dot product method)
+tri+prep time:         0.036286 sec (incl time to compute L and U)
+compute C time:        0.026884 sec
+reduce (C) time:       0.004320 sec
+rate    24.28 million edges/sec (incl time for U=triu(A))
+rate    28.24 million edges/sec (just tricount itself)
+L*U' time (dot):         0.013448 sec (nthreads: 2 speedup 1.99905)
+tricount time:         0.016759 sec (dot product method)
+tri+prep time:         0.021841 sec (incl time to compute L and U)
+compute C time:        0.013448 sec
+reduce (C) time:       0.003310 sec
+rate    40.35 million edges/sec (incl time for U=triu(A))
+rate    52.58 million edges/sec (just tricount itself)
+L*U' time (dot):         0.007162 sec (nthreads: 4 speedup 3.75373)
+tricount time:         0.009101 sec (dot product method)
+tri+prep time:         0.014183 sec (incl time to compute L and U)
+compute C time:        0.007162 sec
+reduce (C) time:       0.001939 sec
+rate    62.13 million edges/sec (incl time for U=triu(A))
+rate    96.83 million edges/sec (just tricount itself)
+L*U' time (dot):         0.008760 sec (nthreads: 8 speedup 3.069)
+tricount time:         0.011839 sec (dot product method)
+tri+prep time:         0.016921 sec (incl time to compute L and U)
+compute C time:        0.008760 sec
+reduce (C) time:       0.003079 sec
+rate    52.08 million edges/sec (incl time for U=triu(A))
+rate    74.43 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.017455 sec
-tricount time:         0.021370 sec (saxpy method)
-tri+prep time:         0.022134 sec (incl time to compute L)
-compute C time:        0.017455 sec
-reduce (C) time:       0.003915 sec
-rate      39.81 million edges/sec (incl time for L=tril(A))
-rate      41.23 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.017756 sec (nthreads: 2 speedup 0.983049)
-tricount time:         0.019520 sec (saxpy method)
-tri+prep time:         0.020284 sec (incl time to compute L)
-compute C time:        0.017756 sec
-reduce (C) time:       0.001764 sec
-rate      43.44 million edges/sec (incl time for L=tril(A))
-rate      45.14 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.010580 sec (nthreads: 4 speedup 1.6498)
-tricount time:         0.011752 sec (saxpy method)
-tri+prep time:         0.012516 sec (incl time to compute L)
-compute C time:        0.010580 sec
-reduce (C) time:       0.001172 sec
-rate      70.41 million edges/sec (incl time for L=tril(A))
-rate      74.98 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.006899 sec (nthreads: 8 speedup 2.52988)
-tricount time:         0.007487 sec (saxpy method)
-tri+prep time:         0.008251 sec (incl time to compute L)
-compute C time:        0.006899 sec
-reduce (C) time:       0.000588 sec
-rate     106.80 million edges/sec (incl time for L=tril(A))
-rate     117.69 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.005221 sec (nthreads: 16 speedup 3.34333)
-tricount time:         0.005529 sec (saxpy method)
-tri+prep time:         0.006293 sec (incl time to compute L)
-compute C time:        0.005221 sec
-reduce (C) time:       0.000308 sec
-rate     140.03 million edges/sec (incl time for L=tril(A))
-rate     159.37 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.006903 sec (nthreads: 32 speedup 2.52844)
-tricount time:         0.007183 sec (saxpy method)
-tri+prep time:         0.007947 sec (incl time to compute L)
-compute C time:        0.006903 sec
-reduce (C) time:       0.000279 sec
-rate     110.89 million edges/sec (incl time for L=tril(A))
-rate     122.68 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.023525 sec
+tricount time:         0.025656 sec (saxpy method)
+tri+prep time:         0.028178 sec (incl time to compute L)
+compute C time:        0.023525 sec
+reduce (C) time:       0.002131 sec
+rate    31.27 million edges/sec (incl time for L=tril(A))
+rate    34.35 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.011029 sec (nthreads: 2 speedup 2.13306)
+tricount time:         0.011980 sec (saxpy method)
+tri+prep time:         0.014502 sec (incl time to compute L)
+compute C time:        0.011029 sec
+reduce (C) time:       0.000951 sec
+rate    60.76 million edges/sec (incl time for L=tril(A))
+rate    73.56 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.005803 sec (nthreads: 4 speedup 4.05405)
+tricount time:         0.006265 sec (saxpy method)
+tri+prep time:         0.008787 sec (incl time to compute L)
+compute C time:        0.005803 sec
+reduce (C) time:       0.000462 sec
+rate   100.28 million edges/sec (incl time for L=tril(A))
+rate   140.65 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.006089 sec (nthreads: 8 speedup 3.86381)
+tricount time:         0.006851 sec (saxpy method)
+tri+prep time:         0.009374 sec (incl time to compute L)
+compute C time:        0.006089 sec
+reduce (C) time:       0.000763 sec
+rate    94.01 million edges/sec (incl time for L=tril(A))
+rate   128.62 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
-random 10000 by 10000, nz: 199768, method 0 time 0.041 sec
+random 10000 by 10000, nz: 199768, method 0 time 0.023 sec
 
-total time to read A matrix:       0.046197 sec
+total time to read A matrix:       0.023445 sec
 
 n 10000 # edges 99884
-U=triu(A) time:        0.003044 sec
-L=tril(A) time:        0.005432 sec
+U=triu(A) time:        0.000208 sec
+L=tril(A) time:        0.000138 sec
 
 ------------------------------------- dot product method:
 # triangles 1357
-L*U' time (dot):         0.013000 sec
-tricount time:         0.013435 sec (dot product method)
-tri+prep time:         0.021912 sec (incl time to compute L and U)
-compute C time:        0.013000 sec
-reduce (C) time:       0.000435 sec
-rate       4.56 million edges/sec (incl time for U=triu(A))
-rate       7.43 million edges/sec (just tricount itself)
-L*U' time (dot):         0.007464 sec (nthreads: 2 speedup 1.7418)
-tricount time:         0.007735 sec (dot product method)
-tri+prep time:         0.016212 sec (incl time to compute L and U)
-compute C time:        0.007464 sec
-reduce (C) time:       0.000272 sec
-rate       6.16 million edges/sec (incl time for U=triu(A))
-rate      12.91 million edges/sec (just tricount itself)
-L*U' time (dot):         0.004295 sec (nthreads: 4 speedup 3.02698)
-tricount time:         0.004475 sec (dot product method)
-tri+prep time:         0.012952 sec (incl time to compute L and U)
-compute C time:        0.004295 sec
-reduce (C) time:       0.000180 sec
-rate       7.71 million edges/sec (incl time for U=triu(A))
-rate      22.32 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002747 sec (nthreads: 8 speedup 4.73244)
-tricount time:         0.002873 sec (dot product method)
-tri+prep time:         0.011350 sec (incl time to compute L and U)
-compute C time:        0.002747 sec
-reduce (C) time:       0.000126 sec
-rate       8.80 million edges/sec (incl time for U=triu(A))
-rate      34.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001649 sec (nthreads: 16 speedup 7.88358)
-tricount time:         0.001983 sec (dot product method)
-tri+prep time:         0.010460 sec (incl time to compute L and U)
-compute C time:        0.001649 sec
-reduce (C) time:       0.000334 sec
-rate       9.55 million edges/sec (incl time for U=triu(A))
-rate      50.36 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001582 sec (nthreads: 32 speedup 8.21994)
-tricount time:         0.002666 sec (dot product method)
-tri+prep time:         0.011143 sec (incl time to compute L and U)
-compute C time:        0.001582 sec
-reduce (C) time:       0.001085 sec
-rate       8.96 million edges/sec (incl time for U=triu(A))
-rate      37.46 million edges/sec (just tricount itself)
-L*U' time (dot):         0.012460 sec
-tricount time:         0.012877 sec (dot product method)
-tri+prep time:         0.021354 sec (incl time to compute L and U)
-compute C time:        0.012460 sec
-reduce (C) time:       0.000417 sec
-rate       4.68 million edges/sec (incl time for U=triu(A))
-rate       7.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.006798 sec (nthreads: 2 speedup 1.83296)
-tricount time:         0.007067 sec (dot product method)
-tri+prep time:         0.015544 sec (incl time to compute L and U)
-compute C time:        0.006798 sec
-reduce (C) time:       0.000269 sec
-rate       6.43 million edges/sec (incl time for U=triu(A))
-rate      14.13 million edges/sec (just tricount itself)
-L*U' time (dot):         0.004116 sec (nthreads: 4 speedup 3.02739)
-tricount time:         0.004309 sec (dot product method)
-tri+prep time:         0.012786 sec (incl time to compute L and U)
-compute C time:        0.004116 sec
-reduce (C) time:       0.000193 sec
-rate       7.81 million edges/sec (incl time for U=triu(A))
-rate      23.18 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002640 sec (nthreads: 8 speedup 4.71911)
-tricount time:         0.002785 sec (dot product method)
-tri+prep time:         0.011262 sec (incl time to compute L and U)
-compute C time:        0.002640 sec
-reduce (C) time:       0.000145 sec
-rate       8.87 million edges/sec (incl time for U=triu(A))
-rate      35.86 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001642 sec (nthreads: 16 speedup 7.58824)
-tricount time:         0.001970 sec (dot product method)
-tri+prep time:         0.010446 sec (incl time to compute L and U)
-compute C time:        0.001642 sec
-reduce (C) time:       0.000328 sec
-rate       9.56 million edges/sec (incl time for U=triu(A))
-rate      50.71 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001565 sec (nthreads: 32 speedup 7.95926)
-tricount time:         0.002637 sec (dot product method)
-tri+prep time:         0.011114 sec (incl time to compute L and U)
-compute C time:        0.001565 sec
-reduce (C) time:       0.001072 sec
-rate       8.99 million edges/sec (incl time for U=triu(A))
-rate      37.87 million edges/sec (just tricount itself)
+L*U' time (dot):         0.010097 sec
+tricount time:         0.010654 sec (dot product method)
+tri+prep time:         0.011000 sec (incl time to compute L and U)
+compute C time:        0.010097 sec
+reduce (C) time:       0.000557 sec
+rate     9.08 million edges/sec (incl time for U=triu(A))
+rate     9.38 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005951 sec (nthreads: 2 speedup 1.69652)
+tricount time:         0.006160 sec (dot product method)
+tri+prep time:         0.006506 sec (incl time to compute L and U)
+compute C time:        0.005951 sec
+reduce (C) time:       0.000209 sec
+rate    15.35 million edges/sec (incl time for U=triu(A))
+rate    16.22 million edges/sec (just tricount itself)
+L*U' time (dot):         0.003291 sec (nthreads: 4 speedup 3.06807)
+tricount time:         0.003470 sec (dot product method)
+tri+prep time:         0.003816 sec (incl time to compute L and U)
+compute C time:        0.003291 sec
+reduce (C) time:       0.000179 sec
+rate    26.17 million edges/sec (incl time for U=triu(A))
+rate    28.78 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005730 sec (nthreads: 8 speedup 1.76216)
+tricount time:         0.005924 sec (dot product method)
+tri+prep time:         0.006270 sec (incl time to compute L and U)
+compute C time:        0.005730 sec
+reduce (C) time:       0.000195 sec
+rate    15.93 million edges/sec (incl time for U=triu(A))
+rate    16.86 million edges/sec (just tricount itself)
+L*U' time (dot):         0.009789 sec
+tricount time:         0.010226 sec (dot product method)
+tri+prep time:         0.010572 sec (incl time to compute L and U)
+compute C time:        0.009789 sec
+reduce (C) time:       0.000436 sec
+rate     9.45 million edges/sec (incl time for U=triu(A))
+rate     9.77 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005670 sec (nthreads: 2 speedup 1.72665)
+tricount time:         0.005965 sec (dot product method)
+tri+prep time:         0.006311 sec (incl time to compute L and U)
+compute C time:        0.005670 sec
+reduce (C) time:       0.000295 sec
+rate    15.83 million edges/sec (incl time for U=triu(A))
+rate    16.74 million edges/sec (just tricount itself)
+L*U' time (dot):         0.003481 sec (nthreads: 4 speedup 2.81227)
+tricount time:         0.003752 sec (dot product method)
+tri+prep time:         0.004098 sec (incl time to compute L and U)
+compute C time:        0.003481 sec
+reduce (C) time:       0.000271 sec
+rate    24.37 million edges/sec (incl time for U=triu(A))
+rate    26.62 million edges/sec (just tricount itself)
+L*U' time (dot):         0.003737 sec (nthreads: 8 speedup 2.61945)
+tricount time:         0.003946 sec (dot product method)
+tri+prep time:         0.004292 sec (incl time to compute L and U)
+compute C time:        0.003737 sec
+reduce (C) time:       0.000209 sec
+rate    23.27 million edges/sec (incl time for U=triu(A))
+rate    25.31 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.004013 sec
-tricount time:         0.004021 sec (saxpy method)
-tri+prep time:         0.009453 sec (incl time to compute L)
-compute C time:        0.004013 sec
-reduce (C) time:       0.000008 sec
-rate      10.57 million edges/sec (incl time for L=tril(A))
-rate      24.84 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.002586 sec (nthreads: 2 speedup 1.55183)
-tricount time:         0.002593 sec (saxpy method)
-tri+prep time:         0.008026 sec (incl time to compute L)
-compute C time:        0.002586 sec
-reduce (C) time:       0.000007 sec
-rate      12.45 million edges/sec (incl time for L=tril(A))
-rate      38.52 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001477 sec (nthreads: 4 speedup 2.71623)
-tricount time:         0.001485 sec (saxpy method)
-tri+prep time:         0.006917 sec (incl time to compute L)
-compute C time:        0.001477 sec
-reduce (C) time:       0.000008 sec
-rate      14.44 million edges/sec (incl time for L=tril(A))
-rate      67.26 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001178 sec (nthreads: 8 speedup 3.40719)
-tricount time:         0.001185 sec (saxpy method)
-tri+prep time:         0.006618 sec (incl time to compute L)
-compute C time:        0.001178 sec
-reduce (C) time:       0.000008 sec
-rate      15.09 million edges/sec (incl time for L=tril(A))
-rate      84.26 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001535 sec (nthreads: 16 speedup 2.6149)
-tricount time:         0.001542 sec (saxpy method)
-tri+prep time:         0.006974 sec (incl time to compute L)
-compute C time:        0.001535 sec
-reduce (C) time:       0.000008 sec
-rate      14.32 million edges/sec (incl time for L=tril(A))
-rate      64.77 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.002617 sec (nthreads: 32 speedup 1.53325)
-tricount time:         0.002626 sec (saxpy method)
-tri+prep time:         0.008059 sec (incl time to compute L)
-compute C time:        0.002617 sec
-reduce (C) time:       0.000009 sec
-rate      12.39 million edges/sec (incl time for L=tril(A))
-rate      38.03 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.003094 sec
+tricount time:         0.003098 sec (saxpy method)
+tri+prep time:         0.003237 sec (incl time to compute L)
+compute C time:        0.003094 sec
+reduce (C) time:       0.000004 sec
+rate    30.86 million edges/sec (incl time for L=tril(A))
+rate    32.24 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.001952 sec (nthreads: 2 speedup 1.58528)
+tricount time:         0.001956 sec (saxpy method)
+tri+prep time:         0.002094 sec (incl time to compute L)
+compute C time:        0.001952 sec
+reduce (C) time:       0.000004 sec
+rate    47.70 million edges/sec (incl time for L=tril(A))
+rate    51.07 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.001451 sec (nthreads: 4 speedup 2.13166)
+tricount time:         0.001456 sec (saxpy method)
+tri+prep time:         0.001594 sec (incl time to compute L)
+compute C time:        0.001451 sec
+reduce (C) time:       0.000004 sec
+rate    62.65 million edges/sec (incl time for L=tril(A))
+rate    68.60 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.001367 sec (nthreads: 8 speedup 2.26374)
+tricount time:         0.001373 sec (saxpy method)
+tri+prep time:         0.001511 sec (incl time to compute L)
+compute C time:        0.001367 sec
+reduce (C) time:       0.000006 sec
+rate    66.09 million edges/sec (incl time for L=tril(A))
+rate    72.75 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
-random 10000 by 10000, nz: 199768, method 1 time 0.031 sec
+random 10000 by 10000, nz: 199768, method 1 time 0.020 sec
 
-total time to read A matrix:       0.035698 sec
+total time to read A matrix:       0.020932 sec
 
 n 10000 # edges 99884
-U=triu(A) time:        0.003238 sec
-L=tril(A) time:        0.002742 sec
+U=triu(A) time:        0.000217 sec
+L=tril(A) time:        0.000126 sec
 
 ------------------------------------- dot product method:
 # triangles 1357
-L*U' time (dot):         0.017783 sec
-tricount time:         0.018320 sec (dot product method)
-tri+prep time:         0.024300 sec (incl time to compute L and U)
-compute C time:        0.017783 sec
-reduce (C) time:       0.000538 sec
-rate       4.11 million edges/sec (incl time for U=triu(A))
-rate       5.45 million edges/sec (just tricount itself)
-L*U' time (dot):         0.008543 sec (nthreads: 2 speedup 2.08156)
-tricount time:         0.008848 sec (dot product method)
-tri+prep time:         0.014827 sec (incl time to compute L and U)
-compute C time:        0.008543 sec
-reduce (C) time:       0.000305 sec
-rate       6.74 million edges/sec (incl time for U=triu(A))
-rate      11.29 million edges/sec (just tricount itself)
-L*U' time (dot):         0.005523 sec (nthreads: 4 speedup 3.22006)
-tricount time:         0.005739 sec (dot product method)
-tri+prep time:         0.011719 sec (incl time to compute L and U)
-compute C time:        0.005523 sec
-reduce (C) time:       0.000216 sec
-rate       8.52 million edges/sec (incl time for U=triu(A))
-rate      17.40 million edges/sec (just tricount itself)
-L*U' time (dot):         0.003153 sec (nthreads: 8 speedup 5.63973)
-tricount time:         0.003300 sec (dot product method)
-tri+prep time:         0.009279 sec (incl time to compute L and U)
-compute C time:        0.003153 sec
-reduce (C) time:       0.000146 sec
-rate      10.76 million edges/sec (incl time for U=triu(A))
-rate      30.27 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002032 sec (nthreads: 16 speedup 8.7503)
-tricount time:         0.002410 sec (dot product method)
-tri+prep time:         0.008390 sec (incl time to compute L and U)
-compute C time:        0.002032 sec
-reduce (C) time:       0.000378 sec
-rate      11.91 million edges/sec (incl time for U=triu(A))
-rate      41.44 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001820 sec (nthreads: 32 speedup 9.77109)
-tricount time:         0.003598 sec (dot product method)
-tri+prep time:         0.009578 sec (incl time to compute L and U)
-compute C time:        0.001820 sec
-reduce (C) time:       0.001778 sec
-rate      10.43 million edges/sec (incl time for U=triu(A))
-rate      27.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.015857 sec
-tricount time:         0.016382 sec (dot product method)
-tri+prep time:         0.022361 sec (incl time to compute L and U)
-compute C time:        0.015857 sec
-reduce (C) time:       0.000525 sec
-rate       4.47 million edges/sec (incl time for U=triu(A))
-rate       6.10 million edges/sec (just tricount itself)
-L*U' time (dot):         0.008181 sec (nthreads: 2 speedup 1.93825)
-tricount time:         0.008490 sec (dot product method)
-tri+prep time:         0.014470 sec (incl time to compute L and U)
-compute C time:        0.008181 sec
-reduce (C) time:       0.000309 sec
-rate       6.90 million edges/sec (incl time for U=triu(A))
-rate      11.76 million edges/sec (just tricount itself)
-L*U' time (dot):         0.004574 sec (nthreads: 4 speedup 3.46686)
-tricount time:         0.004794 sec (dot product method)
-tri+prep time:         0.010774 sec (incl time to compute L and U)
-compute C time:        0.004574 sec
-reduce (C) time:       0.000220 sec
-rate       9.27 million edges/sec (incl time for U=triu(A))
-rate      20.84 million edges/sec (just tricount itself)
-L*U' time (dot):         0.002862 sec (nthreads: 8 speedup 5.54063)
-tricount time:         0.003035 sec (dot product method)
-tri+prep time:         0.009014 sec (incl time to compute L and U)
-compute C time:        0.002862 sec
-reduce (C) time:       0.000173 sec
-rate      11.08 million edges/sec (incl time for U=triu(A))
-rate      32.91 million edges/sec (just tricount itself)
-L*U' time (dot):         0.001829 sec (nthreads: 16 speedup 8.67077)
-tricount time:         0.002204 sec (dot product method)
-tri+prep time:         0.008184 sec (incl time to compute L and U)
-compute C time:        0.001829 sec
+L*U' time (dot):         0.010906 sec
+tricount time:         0.011256 sec (dot product method)
+tri+prep time:         0.011598 sec (incl time to compute L and U)
+compute C time:        0.010906 sec
+reduce (C) time:       0.000350 sec
+rate     8.61 million edges/sec (incl time for U=triu(A))
+rate     8.87 million edges/sec (just tricount itself)
+L*U' time (dot):         0.006398 sec (nthreads: 2 speedup 1.70455)
+tricount time:         0.006774 sec (dot product method)
+tri+prep time:         0.007116 sec (incl time to compute L and U)
+compute C time:        0.006398 sec
 reduce (C) time:       0.000376 sec
-rate      12.20 million edges/sec (incl time for U=triu(A))
-rate      45.31 million edges/sec (just tricount itself)
-L*U' time (dot):         0.004400 sec (nthreads: 32 speedup 3.60393)
-tricount time:         0.005704 sec (dot product method)
-tri+prep time:         0.011683 sec (incl time to compute L and U)
-compute C time:        0.004400 sec
-reduce (C) time:       0.001304 sec
-rate       8.55 million edges/sec (incl time for U=triu(A))
-rate      17.51 million edges/sec (just tricount itself)
+rate    14.04 million edges/sec (incl time for U=triu(A))
+rate    14.75 million edges/sec (just tricount itself)
+L*U' time (dot):         0.004006 sec (nthreads: 4 speedup 2.72215)
+tricount time:         0.004308 sec (dot product method)
+tri+prep time:         0.004651 sec (incl time to compute L and U)
+compute C time:        0.004006 sec
+reduce (C) time:       0.000302 sec
+rate    21.48 million edges/sec (incl time for U=triu(A))
+rate    23.19 million edges/sec (just tricount itself)
+L*U' time (dot):         0.006057 sec (nthreads: 8 speedup 1.80051)
+tricount time:         0.006477 sec (dot product method)
+tri+prep time:         0.006820 sec (incl time to compute L and U)
+compute C time:        0.006057 sec
+reduce (C) time:       0.000420 sec
+rate    14.65 million edges/sec (incl time for U=triu(A))
+rate    15.42 million edges/sec (just tricount itself)
+L*U' time (dot):         0.011083 sec
+tricount time:         0.011476 sec (dot product method)
+tri+prep time:         0.011819 sec (incl time to compute L and U)
+compute C time:        0.011083 sec
+reduce (C) time:       0.000393 sec
+rate     8.45 million edges/sec (incl time for U=triu(A))
+rate     8.70 million edges/sec (just tricount itself)
+L*U' time (dot):         0.006107 sec (nthreads: 2 speedup 1.81493)
+tricount time:         0.006325 sec (dot product method)
+tri+prep time:         0.006668 sec (incl time to compute L and U)
+compute C time:        0.006107 sec
+reduce (C) time:       0.000219 sec
+rate    14.98 million edges/sec (incl time for U=triu(A))
+rate    15.79 million edges/sec (just tricount itself)
+L*U' time (dot):         0.003795 sec (nthreads: 4 speedup 2.92064)
+tricount time:         0.004084 sec (dot product method)
+tri+prep time:         0.004426 sec (incl time to compute L and U)
+compute C time:        0.003795 sec
+reduce (C) time:       0.000289 sec
+rate    22.57 million edges/sec (incl time for U=triu(A))
+rate    24.46 million edges/sec (just tricount itself)
+L*U' time (dot):         0.005885 sec (nthreads: 8 speedup 1.88311)
+tricount time:         0.006190 sec (dot product method)
+tri+prep time:         0.006532 sec (incl time to compute L and U)
+compute C time:        0.005885 sec
+reduce (C) time:       0.000304 sec
+rate    15.29 million edges/sec (incl time for U=triu(A))
+rate    16.14 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         0.004890 sec
-tricount time:         0.004900 sec (saxpy method)
-tri+prep time:         0.007641 sec (incl time to compute L)
-compute C time:        0.004890 sec
-reduce (C) time:       0.000009 sec
-rate      13.07 million edges/sec (incl time for L=tril(A))
-rate      20.39 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.003485 sec (nthreads: 2 speedup 1.40337)
-tricount time:         0.003494 sec (saxpy method)
-tri+prep time:         0.006236 sec (incl time to compute L)
-compute C time:        0.003485 sec
-reduce (C) time:       0.000009 sec
-rate      16.02 million edges/sec (incl time for L=tril(A))
-rate      28.59 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001820 sec (nthreads: 4 speedup 2.6866)
-tricount time:         0.001829 sec (saxpy method)
-tri+prep time:         0.004571 sec (incl time to compute L)
-compute C time:        0.001820 sec
-reduce (C) time:       0.000009 sec
-rate      21.85 million edges/sec (incl time for L=tril(A))
-rate      54.61 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001254 sec (nthreads: 8 speedup 3.89887)
-tricount time:         0.001264 sec (saxpy method)
-tri+prep time:         0.004005 sec (incl time to compute L)
-compute C time:        0.001254 sec
-reduce (C) time:       0.000009 sec
-rate      24.94 million edges/sec (incl time for L=tril(A))
-rate      79.05 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.001540 sec (nthreads: 16 speedup 3.17638)
-tricount time:         0.001549 sec (saxpy method)
-tri+prep time:         0.004291 sec (incl time to compute L)
-compute C time:        0.001540 sec
-reduce (C) time:       0.000009 sec
-rate      23.28 million edges/sec (incl time for L=tril(A))
-rate      64.49 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.002987 sec (nthreads: 32 speedup 1.63721)
-tricount time:         0.002998 sec (saxpy method)
-tri+prep time:         0.005740 sec (incl time to compute L)
-compute C time:        0.002987 sec
-reduce (C) time:       0.000011 sec
-rate      17.40 million edges/sec (incl time for L=tril(A))
-rate      33.31 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.003407 sec
+tricount time:         0.003411 sec (saxpy method)
+tri+prep time:         0.003537 sec (incl time to compute L)
+compute C time:        0.003407 sec
+reduce (C) time:       0.000005 sec
+rate    28.24 million edges/sec (incl time for L=tril(A))
+rate    29.28 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.002249 sec (nthreads: 2 speedup 1.51486)
+tricount time:         0.002254 sec (saxpy method)
+tri+prep time:         0.002380 sec (incl time to compute L)
+compute C time:        0.002249 sec
+reduce (C) time:       0.000005 sec
+rate    41.98 million edges/sec (incl time for L=tril(A))
+rate    44.32 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.001689 sec (nthreads: 4 speedup 2.01672)
+tricount time:         0.001697 sec (saxpy method)
+tri+prep time:         0.001822 sec (incl time to compute L)
+compute C time:        0.001689 sec
+reduce (C) time:       0.000007 sec
+rate    54.81 million edges/sec (incl time for L=tril(A))
+rate    58.87 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         0.001550 sec (nthreads: 8 speedup 2.19831)
+tricount time:         0.001558 sec (saxpy method)
+tri+prep time:         0.001684 sec (incl time to compute L)
+compute C time:        0.001550 sec
+reduce (C) time:       0.000008 sec
+rate    59.32 million edges/sec (incl time for L=tril(A))
+rate    64.11 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
-random 100000 by 100000, nz: 19980330, method 0 time 2.206 sec
+random 100000 by 100000, nz: 19980330, method 0 time 2.288 sec
 
-total time to read A matrix:       2.223417 sec
+total time to read A matrix:       2.312987 sec
 
 n 100000 # edges 9990165
-U=triu(A) time:        0.009389 sec
-L=tril(A) time:        0.008017 sec
+U=triu(A) time:        0.018055 sec
+L=tril(A) time:        0.019905 sec
 
 ------------------------------------- dot product method:
 # triangles 1330131
-L*U' time (dot):        10.154736 sec
-tricount time:        10.211061 sec (dot product method)
-tri+prep time:        10.228468 sec (incl time to compute L and U)
-compute C time:       10.154736 sec
-reduce (C) time:       0.056325 sec
-rate       0.98 million edges/sec (incl time for U=triu(A))
-rate       0.98 million edges/sec (just tricount itself)
-L*U' time (dot):         5.144116 sec (nthreads: 2 speedup 1.97405)
-tricount time:         5.178243 sec (dot product method)
-tri+prep time:         5.195649 sec (incl time to compute L and U)
-compute C time:        5.144116 sec
-reduce (C) time:       0.034128 sec
-rate       1.92 million edges/sec (incl time for U=triu(A))
-rate       1.93 million edges/sec (just tricount itself)
-L*U' time (dot):         2.581320 sec (nthreads: 4 speedup 3.93393)
-tricount time:         2.602296 sec (dot product method)
-tri+prep time:         2.619702 sec (incl time to compute L and U)
-compute C time:        2.581320 sec
-reduce (C) time:       0.020976 sec
-rate       3.81 million edges/sec (incl time for U=triu(A))
-rate       3.84 million edges/sec (just tricount itself)
-L*U' time (dot):         1.304316 sec (nthreads: 8 speedup 7.78549)
-tricount time:         1.319417 sec (dot product method)
-tri+prep time:         1.336823 sec (incl time to compute L and U)
-compute C time:        1.304316 sec
-reduce (C) time:       0.015101 sec
-rate       7.47 million edges/sec (incl time for U=triu(A))
-rate       7.57 million edges/sec (just tricount itself)
-L*U' time (dot):         0.670855 sec (nthreads: 16 speedup 15.137)
-tricount time:         0.683685 sec (dot product method)
-tri+prep time:         0.701091 sec (incl time to compute L and U)
-compute C time:        0.670855 sec
-reduce (C) time:       0.012830 sec
-rate      14.25 million edges/sec (incl time for U=triu(A))
-rate      14.61 million edges/sec (just tricount itself)
-L*U' time (dot):         0.400067 sec (nthreads: 32 speedup 25.3826)
-tricount time:         0.411393 sec (dot product method)
-tri+prep time:         0.428799 sec (incl time to compute L and U)
-compute C time:        0.400067 sec
-reduce (C) time:       0.011326 sec
-rate      23.30 million edges/sec (incl time for U=triu(A))
-rate      24.28 million edges/sec (just tricount itself)
-L*U' time (dot):        10.318314 sec
-tricount time:        10.367729 sec (dot product method)
-tri+prep time:        10.385135 sec (incl time to compute L and U)
-compute C time:       10.318314 sec
-reduce (C) time:       0.049414 sec
-rate       0.96 million edges/sec (incl time for U=triu(A))
-rate       0.96 million edges/sec (just tricount itself)
-L*U' time (dot):         5.231802 sec (nthreads: 2 speedup 1.97223)
-tricount time:         5.262158 sec (dot product method)
-tri+prep time:         5.279564 sec (incl time to compute L and U)
-compute C time:        5.231802 sec
-reduce (C) time:       0.030357 sec
-rate       1.89 million edges/sec (incl time for U=triu(A))
-rate       1.90 million edges/sec (just tricount itself)
-L*U' time (dot):         2.647840 sec (nthreads: 4 speedup 3.89688)
-tricount time:         2.667760 sec (dot product method)
-tri+prep time:         2.685166 sec (incl time to compute L and U)
-compute C time:        2.647840 sec
-reduce (C) time:       0.019920 sec
-rate       3.72 million edges/sec (incl time for U=triu(A))
-rate       3.74 million edges/sec (just tricount itself)
-L*U' time (dot):         1.334589 sec (nthreads: 8 speedup 7.73146)
-tricount time:         1.349294 sec (dot product method)
-tri+prep time:         1.366700 sec (incl time to compute L and U)
-compute C time:        1.334589 sec
-reduce (C) time:       0.014705 sec
-rate       7.31 million edges/sec (incl time for U=triu(A))
-rate       7.40 million edges/sec (just tricount itself)
-L*U' time (dot):         0.682957 sec (nthreads: 16 speedup 15.1083)
-tricount time:         0.694423 sec (dot product method)
-tri+prep time:         0.711829 sec (incl time to compute L and U)
-compute C time:        0.682957 sec
-reduce (C) time:       0.011466 sec
-rate      14.03 million edges/sec (incl time for U=triu(A))
-rate      14.39 million edges/sec (just tricount itself)
-L*U' time (dot):         0.406612 sec (nthreads: 32 speedup 25.3763)
-tricount time:         0.418449 sec (dot product method)
-tri+prep time:         0.435855 sec (incl time to compute L and U)
-compute C time:        0.406612 sec
-reduce (C) time:       0.011837 sec
-rate      22.92 million edges/sec (incl time for U=triu(A))
-rate      23.87 million edges/sec (just tricount itself)
+L*U' time (dot):        10.406264 sec
+tricount time:        10.444652 sec (dot product method)
+tri+prep time:        10.482612 sec (incl time to compute L and U)
+compute C time:       10.406264 sec
+reduce (C) time:       0.038388 sec
+rate     0.95 million edges/sec (incl time for U=triu(A))
+rate     0.96 million edges/sec (just tricount itself)
+L*U' time (dot):         5.500298 sec (nthreads: 2 speedup 1.89195)
+tricount time:         5.522717 sec (dot product method)
+tri+prep time:         5.560678 sec (incl time to compute L and U)
+compute C time:        5.500298 sec
+reduce (C) time:       0.022419 sec
+rate     1.80 million edges/sec (incl time for U=triu(A))
+rate     1.81 million edges/sec (just tricount itself)
+L*U' time (dot):         3.877952 sec (nthreads: 4 speedup 2.68344)
+tricount time:         3.897149 sec (dot product method)
+tri+prep time:         3.935109 sec (incl time to compute L and U)
+compute C time:        3.877952 sec
+reduce (C) time:       0.019197 sec
+rate     2.54 million edges/sec (incl time for U=triu(A))
+rate     2.56 million edges/sec (just tricount itself)
+L*U' time (dot):         2.696096 sec (nthreads: 8 speedup 3.85975)
+tricount time:         2.716718 sec (dot product method)
+tri+prep time:         2.754678 sec (incl time to compute L and U)
+compute C time:        2.696096 sec
+reduce (C) time:       0.020622 sec
+rate     3.63 million edges/sec (incl time for U=triu(A))
+rate     3.68 million edges/sec (just tricount itself)
+L*U' time (dot):        11.380700 sec
+tricount time:        11.415574 sec (dot product method)
+tri+prep time:        11.453534 sec (incl time to compute L and U)
+compute C time:       11.380700 sec
+reduce (C) time:       0.034874 sec
+rate     0.87 million edges/sec (incl time for U=triu(A))
+rate     0.88 million edges/sec (just tricount itself)
+L*U' time (dot):         6.448143 sec (nthreads: 2 speedup 1.76496)
+tricount time:         6.472799 sec (dot product method)
+tri+prep time:         6.510759 sec (incl time to compute L and U)
+compute C time:        6.448143 sec
+reduce (C) time:       0.024655 sec
+rate     1.53 million edges/sec (incl time for U=triu(A))
+rate     1.54 million edges/sec (just tricount itself)
+L*U' time (dot):         3.976479 sec (nthreads: 4 speedup 2.862)
+tricount time:         3.995512 sec (dot product method)
+tri+prep time:         4.033473 sec (incl time to compute L and U)
+compute C time:        3.976479 sec
+reduce (C) time:       0.019033 sec
+rate     2.48 million edges/sec (incl time for U=triu(A))
+rate     2.50 million edges/sec (just tricount itself)
+L*U' time (dot):         2.843621 sec (nthreads: 8 speedup 4.00219)
+tricount time:         2.863759 sec (dot product method)
+tri+prep time:         2.901720 sec (incl time to compute L and U)
+compute C time:        2.843621 sec
+reduce (C) time:       0.020138 sec
+rate     3.44 million edges/sec (incl time for U=triu(A))
+rate     3.49 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         2.275291 sec
-tricount time:         2.281548 sec (saxpy method)
-tri+prep time:         2.289564 sec (incl time to compute L)
-compute C time:        2.275291 sec
-reduce (C) time:       0.006257 sec
-rate       4.36 million edges/sec (incl time for L=tril(A))
-rate       4.38 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         1.329376 sec (nthreads: 2 speedup 1.71155)
-tricount time:         1.332291 sec (saxpy method)
-tri+prep time:         1.340307 sec (incl time to compute L)
-compute C time:        1.329376 sec
-reduce (C) time:       0.002914 sec
-rate       7.45 million edges/sec (incl time for L=tril(A))
-rate       7.50 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.733922 sec (nthreads: 4 speedup 3.10018)
-tricount time:         0.735290 sec (saxpy method)
-tri+prep time:         0.743307 sec (incl time to compute L)
-compute C time:        0.733922 sec
-reduce (C) time:       0.001368 sec
-rate      13.44 million edges/sec (incl time for L=tril(A))
-rate      13.59 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.379981 sec (nthreads: 8 speedup 5.98791)
-tricount time:         0.380614 sec (saxpy method)
-tri+prep time:         0.388631 sec (incl time to compute L)
-compute C time:        0.379981 sec
-reduce (C) time:       0.000633 sec
-rate      25.71 million edges/sec (incl time for L=tril(A))
-rate      26.25 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.220059 sec (nthreads: 16 speedup 10.3395)
-tricount time:         0.220389 sec (saxpy method)
-tri+prep time:         0.228406 sec (incl time to compute L)
-compute C time:        0.220059 sec
-reduce (C) time:       0.000330 sec
-rate      43.74 million edges/sec (incl time for L=tril(A))
-rate      45.33 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.177751 sec (nthreads: 32 speedup 12.8004)
-tricount time:         0.178491 sec (saxpy method)
-tri+prep time:         0.186507 sec (incl time to compute L)
-compute C time:        0.177751 sec
-reduce (C) time:       0.000739 sec
-rate      53.56 million edges/sec (incl time for L=tril(A))
-rate      55.97 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         5.509919 sec
+tricount time:         5.514612 sec (saxpy method)
+tri+prep time:         5.534517 sec (incl time to compute L)
+compute C time:        5.509919 sec
+reduce (C) time:       0.004693 sec
+rate     1.81 million edges/sec (incl time for L=tril(A))
+rate     1.81 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         3.173495 sec (nthreads: 2 speedup 1.73623)
+tricount time:         3.176608 sec (saxpy method)
+tri+prep time:         3.196513 sec (incl time to compute L)
+compute C time:        3.173495 sec
+reduce (C) time:       0.003114 sec
+rate     3.13 million edges/sec (incl time for L=tril(A))
+rate     3.14 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         1.828458 sec (nthreads: 4 speedup 3.01342)
+tricount time:         1.830920 sec (saxpy method)
+tri+prep time:         1.850825 sec (incl time to compute L)
+compute C time:        1.828458 sec
+reduce (C) time:       0.002462 sec
+rate     5.40 million edges/sec (incl time for L=tril(A))
+rate     5.46 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         2.238881 sec (nthreads: 8 speedup 2.46101)
+tricount time:         2.239755 sec (saxpy method)
+tri+prep time:         2.259660 sec (incl time to compute L)
+compute C time:        2.238881 sec
+reduce (C) time:       0.000873 sec
+rate     4.42 million edges/sec (incl time for L=tril(A))
+rate     4.46 million edges/sec (just tricount itself)
 
 --------------------------------------------------------------
-random 100000 by 100000, nz: 19980330, method 1 time 1.358 sec
+random 100000 by 100000, nz: 19980330, method 1 time 1.729 sec
 
-total time to read A matrix:       1.373378 sec
+total time to read A matrix:       1.752646 sec
 
 n 100000 # edges 9990165
-U=triu(A) time:        0.011773 sec
-L=tril(A) time:        0.007480 sec
+U=triu(A) time:        0.017521 sec
+L=tril(A) time:        0.019028 sec
 
 ------------------------------------- dot product method:
 # triangles 1330131
-L*U' time (dot):        10.097238 sec
-tricount time:        10.153764 sec (dot product method)
-tri+prep time:        10.173017 sec (incl time to compute L and U)
-compute C time:       10.097238 sec
-reduce (C) time:       0.056526 sec
-rate       0.98 million edges/sec (incl time for U=triu(A))
-rate       0.98 million edges/sec (just tricount itself)
-L*U' time (dot):         5.141591 sec (nthreads: 2 speedup 1.96384)
-tricount time:         5.175693 sec (dot product method)
-tri+prep time:         5.194947 sec (incl time to compute L and U)
-compute C time:        5.141591 sec
-reduce (C) time:       0.034102 sec
-rate       1.92 million edges/sec (incl time for U=triu(A))
-rate       1.93 million edges/sec (just tricount itself)
-L*U' time (dot):         2.572865 sec (nthreads: 4 speedup 3.92451)
-tricount time:         2.593160 sec (dot product method)
-tri+prep time:         2.612413 sec (incl time to compute L and U)
-compute C time:        2.572865 sec
-reduce (C) time:       0.020295 sec
-rate       3.82 million edges/sec (incl time for U=triu(A))
-rate       3.85 million edges/sec (just tricount itself)
-L*U' time (dot):         1.310306 sec (nthreads: 8 speedup 7.70601)
-tricount time:         1.325576 sec (dot product method)
-tri+prep time:         1.344830 sec (incl time to compute L and U)
-compute C time:        1.310306 sec
-reduce (C) time:       0.015270 sec
-rate       7.43 million edges/sec (incl time for U=triu(A))
-rate       7.54 million edges/sec (just tricount itself)
-L*U' time (dot):         0.664748 sec (nthreads: 16 speedup 15.1896)
-tricount time:         0.676293 sec (dot product method)
-tri+prep time:         0.695546 sec (incl time to compute L and U)
-compute C time:        0.664748 sec
-reduce (C) time:       0.011545 sec
-rate      14.36 million edges/sec (incl time for U=triu(A))
-rate      14.77 million edges/sec (just tricount itself)
-L*U' time (dot):         0.397795 sec (nthreads: 32 speedup 25.383)
-tricount time:         0.409458 sec (dot product method)
-tri+prep time:         0.428711 sec (incl time to compute L and U)
-compute C time:        0.397795 sec
-reduce (C) time:       0.011663 sec
-rate      23.30 million edges/sec (incl time for U=triu(A))
-rate      24.40 million edges/sec (just tricount itself)
-L*U' time (dot):        10.430272 sec
-tricount time:        10.480139 sec (dot product method)
-tri+prep time:        10.499392 sec (incl time to compute L and U)
-compute C time:       10.430272 sec
-reduce (C) time:       0.049867 sec
-rate       0.95 million edges/sec (incl time for U=triu(A))
-rate       0.95 million edges/sec (just tricount itself)
-L*U' time (dot):         5.280755 sec (nthreads: 2 speedup 1.97515)
-tricount time:         5.311437 sec (dot product method)
-tri+prep time:         5.330690 sec (incl time to compute L and U)
-compute C time:        5.280755 sec
-reduce (C) time:       0.030682 sec
-rate       1.87 million edges/sec (incl time for U=triu(A))
-rate       1.88 million edges/sec (just tricount itself)
-L*U' time (dot):         2.650433 sec (nthreads: 4 speedup 3.93531)
-tricount time:         2.671086 sec (dot product method)
-tri+prep time:         2.690339 sec (incl time to compute L and U)
-compute C time:        2.650433 sec
-reduce (C) time:       0.020653 sec
-rate       3.71 million edges/sec (incl time for U=triu(A))
-rate       3.74 million edges/sec (just tricount itself)
-L*U' time (dot):         1.337364 sec (nthreads: 8 speedup 7.79913)
-tricount time:         1.352712 sec (dot product method)
-tri+prep time:         1.371966 sec (incl time to compute L and U)
-compute C time:        1.337364 sec
-reduce (C) time:       0.015348 sec
-rate       7.28 million edges/sec (incl time for U=triu(A))
-rate       7.39 million edges/sec (just tricount itself)
-L*U' time (dot):         0.684550 sec (nthreads: 16 speedup 15.2367)
-tricount time:         0.697919 sec (dot product method)
-tri+prep time:         0.717172 sec (incl time to compute L and U)
-compute C time:        0.684550 sec
-reduce (C) time:       0.013368 sec
-rate      13.93 million edges/sec (incl time for U=triu(A))
-rate      14.31 million edges/sec (just tricount itself)
-L*U' time (dot):         0.409997 sec (nthreads: 32 speedup 25.4399)
-tricount time:         0.422338 sec (dot product method)
-tri+prep time:         0.441591 sec (incl time to compute L and U)
-compute C time:        0.409997 sec
-reduce (C) time:       0.012341 sec
-rate      22.62 million edges/sec (incl time for U=triu(A))
-rate      23.65 million edges/sec (just tricount itself)
+L*U' time (dot):        10.763829 sec
+tricount time:        10.801652 sec (dot product method)
+tri+prep time:        10.838201 sec (incl time to compute L and U)
+compute C time:       10.763829 sec
+reduce (C) time:       0.037823 sec
+rate     0.92 million edges/sec (incl time for U=triu(A))
+rate     0.92 million edges/sec (just tricount itself)
+L*U' time (dot):         5.594314 sec (nthreads: 2 speedup 1.92407)
+tricount time:         5.620549 sec (dot product method)
+tri+prep time:         5.657098 sec (incl time to compute L and U)
+compute C time:        5.594314 sec
+reduce (C) time:       0.026235 sec
+rate     1.77 million edges/sec (incl time for U=triu(A))
+rate     1.78 million edges/sec (just tricount itself)
+L*U' time (dot):         3.932143 sec (nthreads: 4 speedup 2.7374)
+tricount time:         3.951585 sec (dot product method)
+tri+prep time:         3.988135 sec (incl time to compute L and U)
+compute C time:        3.932143 sec
+reduce (C) time:       0.019443 sec
+rate     2.50 million edges/sec (incl time for U=triu(A))
+rate     2.53 million edges/sec (just tricount itself)
+L*U' time (dot):         2.689850 sec (nthreads: 8 speedup 4.00165)
+tricount time:         2.710086 sec (dot product method)
+tri+prep time:         2.746635 sec (incl time to compute L and U)
+compute C time:        2.689850 sec
+reduce (C) time:       0.020236 sec
+rate     3.64 million edges/sec (incl time for U=triu(A))
+rate     3.69 million edges/sec (just tricount itself)
+L*U' time (dot):        10.959365 sec
+tricount time:        10.994169 sec (dot product method)
+tri+prep time:        11.030718 sec (incl time to compute L and U)
+compute C time:       10.959365 sec
+reduce (C) time:       0.034805 sec
+rate     0.91 million edges/sec (incl time for U=triu(A))
+rate     0.91 million edges/sec (just tricount itself)
+L*U' time (dot):         6.370761 sec (nthreads: 2 speedup 1.72026)
+tricount time:         6.394619 sec (dot product method)
+tri+prep time:         6.431168 sec (incl time to compute L and U)
+compute C time:        6.370761 sec
+reduce (C) time:       0.023858 sec
+rate     1.55 million edges/sec (incl time for U=triu(A))
+rate     1.56 million edges/sec (just tricount itself)
+L*U' time (dot):         3.950553 sec (nthreads: 4 speedup 2.77413)
+tricount time:         3.970562 sec (dot product method)
+tri+prep time:         4.007111 sec (incl time to compute L and U)
+compute C time:        3.950553 sec
+reduce (C) time:       0.020008 sec
+rate     2.49 million edges/sec (incl time for U=triu(A))
+rate     2.52 million edges/sec (just tricount itself)
+L*U' time (dot):         2.815654 sec (nthreads: 8 speedup 3.8923)
+tricount time:         2.836292 sec (dot product method)
+tri+prep time:         2.872841 sec (incl time to compute L and U)
+compute C time:        2.815654 sec
+reduce (C) time:       0.020638 sec
+rate     3.48 million edges/sec (incl time for U=triu(A))
+rate     3.52 million edges/sec (just tricount itself)
 
 ----------------------------------- saxpy method:
-C<L>=L*L time (saxpy):         2.373299 sec
-tricount time:         2.379602 sec (saxpy method)
-tri+prep time:         2.387082 sec (incl time to compute L)
-compute C time:        2.373299 sec
-reduce (C) time:       0.006303 sec
-rate       4.19 million edges/sec (incl time for L=tril(A))
-rate       4.20 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         1.389014 sec (nthreads: 2 speedup 1.70862)
-tricount time:         1.391705 sec (saxpy method)
-tri+prep time:         1.399185 sec (incl time to compute L)
-compute C time:        1.389014 sec
-reduce (C) time:       0.002690 sec
-rate       7.14 million edges/sec (incl time for L=tril(A))
-rate       7.18 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.704975 sec (nthreads: 4 speedup 3.3665)
-tricount time:         0.706229 sec (saxpy method)
-tri+prep time:         0.713709 sec (incl time to compute L)
-compute C time:        0.704975 sec
-reduce (C) time:       0.001253 sec
-rate      14.00 million edges/sec (incl time for L=tril(A))
-rate      14.15 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.378052 sec (nthreads: 8 speedup 6.27771)
-tricount time:         0.378688 sec (saxpy method)
-tri+prep time:         0.386167 sec (incl time to compute L)
-compute C time:        0.378052 sec
-reduce (C) time:       0.000636 sec
-rate      25.87 million edges/sec (incl time for L=tril(A))
-rate      26.38 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.213988 sec (nthreads: 16 speedup 11.0908)
-tricount time:         0.214335 sec (saxpy method)
-tri+prep time:         0.221815 sec (incl time to compute L)
-compute C time:        0.213988 sec
-reduce (C) time:       0.000347 sec
-rate      45.04 million edges/sec (incl time for L=tril(A))
-rate      46.61 million edges/sec (just tricount itself)
-C<L>=L*L time (saxpy):         0.175610 sec (nthreads: 32 speedup 13.5146)
-tricount time:         0.176128 sec (saxpy method)
-tri+prep time:         0.183608 sec (incl time to compute L)
-compute C time:        0.175610 sec
-reduce (C) time:       0.000518 sec
-rate      54.41 million edges/sec (incl time for L=tril(A))
-rate      56.72 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         5.317642 sec
+tricount time:         5.322329 sec (saxpy method)
+tri+prep time:         5.341357 sec (incl time to compute L)
+compute C time:        5.317642 sec
+reduce (C) time:       0.004687 sec
+rate     1.87 million edges/sec (incl time for L=tril(A))
+rate     1.88 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         2.962654 sec (nthreads: 2 speedup 1.79489)
+tricount time:         2.965725 sec (saxpy method)
+tri+prep time:         2.984754 sec (incl time to compute L)
+compute C time:        2.962654 sec
+reduce (C) time:       0.003071 sec
+rate     3.35 million edges/sec (incl time for L=tril(A))
+rate     3.37 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         1.746472 sec (nthreads: 4 speedup 3.04479)
+tricount time:         1.748662 sec (saxpy method)
+tri+prep time:         1.767690 sec (incl time to compute L)
+compute C time:        1.746472 sec
+reduce (C) time:       0.002190 sec
+rate     5.65 million edges/sec (incl time for L=tril(A))
+rate     5.71 million edges/sec (just tricount itself)
+C<L>=L*L time (saxpy):         1.784942 sec (nthreads: 8 speedup 2.97917)
+tricount time:         1.785944 sec (saxpy method)
+tri+prep time:         1.804972 sec (incl time to compute L)
+compute C time:        1.784942 sec
+reduce (C) time:       0.001002 sec
+rate     5.53 million edges/sec (incl time for L=tril(A))
+rate     5.59 million edges/sec (just tricount itself)
 
diff --git a/Demo/Output/wildtype_demo.out b/Demo/Output/wildtype_demo.out
index 0d7902cb53..3fe69b0943 100644
--- a/Demo/Output/wildtype_demo.out
+++ b/Demo/Output/wildtype_demo.out
@@ -1,5 +1,5 @@
 WildType size: 128
-    GraphBLAS type: WildType run-time user-defined: [wildtype] size: 128
+    GraphBLAS type: WildType user-defined: [wildtype] size: 128
 
 a wildtype scalar: scalar1 [this is from scalar1]
        0.0        1.0        2.0        3.0 
@@ -25,18 +25,18 @@ a wildtype scalar: scalar_identity for the monoid [identity]
        0.0        0.0        0.0        0.0 
        0.0        0.0        0.0        0.0 
 
-    GraphBLAS Semiring: InTheWild (user-defined at run-time)
-    GraphBLAS Monoid: semiring->add (user-defined at run-time)
-    GraphBLAS BinaryOp: monoid->op (run-time user-defined) z=wildtype_add(x,y)
-    GraphBLAS type: ztype run-time user-defined: [wildtype] size: 128
-    GraphBLAS type: xtype run-time user-defined: [wildtype] size: 128
-    GraphBLAS type: ytype run-time user-defined: [wildtype] size: 128
+    GraphBLAS Semiring: InTheWild (user-defined)
+    GraphBLAS Monoid: semiring->add (user-defined)
+    GraphBLAS BinaryOp: monoid->op (user-defined) z=wildtype_add(x,y)
+    GraphBLAS type: ztype user-defined: [wildtype] size: 128
+    GraphBLAS type: xtype user-defined: [wildtype] size: 128
+    GraphBLAS type: ytype user-defined: [wildtype] size: 128
     identity: [ [user-defined value] ] 
 
-    GraphBLAS BinaryOp: semiring->multiply (run-time user-defined) z=wildtype_mult(x,y)
-    GraphBLAS type: ztype run-time user-defined: [wildtype] size: 128
-    GraphBLAS type: xtype run-time user-defined: [wildtype] size: 128
-    GraphBLAS type: ytype run-time user-defined: [wildtype] size: 128
+    GraphBLAS BinaryOp: semiring->multiply (user-defined) z=wildtype_mult(x,y)
+    GraphBLAS type: ztype user-defined: [wildtype] size: 128
+    GraphBLAS type: xtype user-defined: [wildtype] size: 128
+    GraphBLAS type: ytype user-defined: [wildtype] size: 128
 
 multiplication C=A*B InTheWild semiring:
 
@@ -84,7 +84,7 @@ Printing the matrix with GxB_Matrix_fprint:
   input B, no entries
   pending tuples: 3 max pending: 256 zombies: 0
   pending tuples:
-    GraphBLAS type:  run-time user-defined: [wildtype] size: 128
+    GraphBLAS type:  user-defined: [wildtype] size: 128
     row: 7 col: 2 [user-defined value]
     row: 7 col: 5 [user-defined value]
     row: 4 col: 2 [user-defined value]
@@ -119,60 +119,60 @@ a wildtype scalar:  [here is B(7,5)]
 
 ============= that was the WildType matrix input B
 
-a wildtype scalar: x for multiply: [this is A(2,7)]
+a wildtype scalar: x for multiply: [this is A(2,4)]
        0.0        1.0        2.0        3.0 
      100.0      101.0      102.0      103.0 
-     200.0      201.0      202.0      203.0 
-     300.0      301.0      302.0      303.0 
+     200.0      201.0      202.0      909.0 
+     300.0      301.0      302.0       42.0 
 
-a wildtype scalar: y for multiply: [this is B(7,2)]
-       0.5        1.5        2.5        3.5 
+a wildtype scalar: y for multiply: [finally, B(4,2)]
+      -1.0        1.5        2.5       77.0 
       -0.5        0.5        1.5        2.5 
       -1.5       -0.5        0.5        1.5 
       -2.5       -1.5       -0.5        0.5 
 
 do the multiply:
-   [this was multiplied] = [this is A(2,7)] * [this is B(7,2)]
+   [this was multiplied] = [this is A(2,4)] * [finally, B(4,2)]
 
 a wildtype scalar: z = x*y: [this was multiplied]
      -11.0       -5.0        1.0        7.0 
-    -411.0       -5.0      401.0      807.0 
-    -811.0       -5.0      801.0     1607.0 
-   -1211.0       -5.0     1201.0     2407.0 
+    -561.0       -5.0      401.0     8157.0 
+   -2876.0    -1064.0      448.0    16660.0 
+   -1008.5      386.5     1331.5    24326.5 
 
-a wildtype scalar: x for multiply: [this is A(2,4)]
+a wildtype scalar: x for multiply: [this is A(2,7)]
        0.0        1.0        2.0        3.0 
      100.0      101.0      102.0      103.0 
-     200.0      201.0      202.0      909.0 
-     300.0      301.0      302.0       42.0 
+     200.0      201.0      202.0      203.0 
+     300.0      301.0      302.0      303.0 
 
-a wildtype scalar: y for multiply: [finally, B(4,2)]
-      -1.0        1.5        2.5       77.0 
+a wildtype scalar: y for multiply: [this is B(7,2)]
+       0.5        1.5        2.5        3.5 
       -0.5        0.5        1.5        2.5 
       -1.5       -0.5        0.5        1.5 
       -2.5       -1.5       -0.5        0.5 
 
 do the multiply:
-   [this was multiplied] = [this is A(2,4)] * [finally, B(4,2)]
+   [this was multiplied] = [this is A(2,7)] * [this is B(7,2)]
 
 a wildtype scalar: z = x*y: [this was multiplied]
      -11.0       -5.0        1.0        7.0 
-    -561.0       -5.0      401.0     8157.0 
-   -2876.0    -1064.0      448.0    16660.0 
-   -1008.5      386.5     1331.5    24326.5 
-
-a wildtype scalar: x for add: [this was multiplied]
-     -11.0       -5.0        1.0        7.0 
     -411.0       -5.0      401.0      807.0 
     -811.0       -5.0      801.0     1607.0 
    -1211.0       -5.0     1201.0     2407.0 
 
-a wildtype scalar: y for add: [this was multiplied]
+a wildtype scalar: x for add: [this was multiplied]
      -11.0       -5.0        1.0        7.0 
     -561.0       -5.0      401.0     8157.0 
    -2876.0    -1064.0      448.0    16660.0 
    -1008.5      386.5     1331.5    24326.5 
 
+a wildtype scalar: y for add: [this was multiplied]
+     -11.0       -5.0        1.0        7.0 
+    -411.0       -5.0      401.0      807.0 
+    -811.0       -5.0      801.0     1607.0 
+   -1211.0       -5.0     1201.0     2407.0 
+
 do the add:
     [this was added] = [this was added] + [this was multiplied]
 
diff --git a/Demo/Program/bfs_demo.c b/Demo/Program/bfs_demo.c
index 5de8142c1e..9290984789 100644
--- a/Demo/Program/bfs_demo.c
+++ b/Demo/Program/bfs_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/bfs_demo.c: breadth first search using vxm with a mask
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -54,7 +54,9 @@ int main (int argc, char **argv)
     int64_t nlevel0 = -1 ;
     double tic [2], t ;
     OK (GrB_init (GrB_NONBLOCKING)) ;
-    fprintf (stderr, "bfs_demo:\n") ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "bfs_demo: nthreads %d\n", nthreads) ;
 
     //--------------------------------------------------------------------------
     // read a matrix from stdin
diff --git a/Demo/Program/complex_demo.c b/Demo/Program/complex_demo.c
index cefc6c893c..81ec3002d1 100644
--- a/Demo/Program/complex_demo.c
+++ b/Demo/Program/complex_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/complex_demo.c: demo for user-defined complex type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -64,7 +64,9 @@ int main (int argc, char **argv)
 
     // initialize GraphBLAS and create the user-defined Complex type
     GrB_init (GrB_NONBLOCKING) ;
-    fprintf (stderr, "complex_demo:\n") ;
+    int nthreads ;
+    GxB_get (GxB_NTHREADS, &nthreads) ;
+    fprintf (stderr, "complex_demo: nthreads: %d\n", nthreads) ;
     Complex_init ( ) ;
 
     // generate random matrices A and B
diff --git a/Demo/Program/import_demo.c b/Demo/Program/import_demo.c
index 3ec92ef20d..8e5352b6f9 100644
--- a/Demo/Program/import_demo.c
+++ b/Demo/Program/import_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/import_demo.c: test import/export
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,7 +22,9 @@ int main (int argc, char **argv)
     GrB_Matrix A = NULL ;
     GrB_Info info ;
     OK (GrB_init (GrB_NONBLOCKING)) ;
-    fprintf (stderr, "import_demo:\n") ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "import_demo: nthreads: %d\n", nthreads) ;
 
     //--------------------------------------------------------------------------
     // get a matrix
diff --git a/Demo/Program/kron_demo.c b/Demo/Program/kron_demo.c
index db7a885ed5..e39dc4d4dd 100644
--- a/Demo/Program/kron_demo.c
+++ b/Demo/Program/kron_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/kron_demo.c: Kronkecker product
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -51,6 +51,9 @@ int main (int argc, char **argv)
     double tic [2], t ;
 
     OK (GrB_init (GrB_NONBLOCKING)) ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "kron demo: nthreads %d\n", nthreads) ;
 
     // printf ("argc %d\n", argc) ;
     if (argc != 4)
diff --git a/Demo/Program/mis_demo.c b/Demo/Program/mis_demo.c
index 9e3b3d787b..e9bacd0f0c 100644
--- a/Demo/Program/mis_demo.c
+++ b/Demo/Program/mis_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/mis_demo.c: maximal independent set
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -177,7 +177,9 @@ int main (int argc, char **argv)
 
     double tic [2], t1, t2 ;
     OK (GrB_init (GrB_NONBLOCKING)) ;
-    fprintf (stderr, "\nmis_demo:\n") ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "\nmis_demo: nthreads: %d\n", nthreads) ;
 
     //--------------------------------------------------------------------------
     // get a symmetric matrix with no self edges
diff --git a/Demo/Program/openmp_demo.c b/Demo/Program/openmp_demo.c
index fc3b5297e8..9aa43d5435 100644
--- a/Demo/Program/openmp_demo.c
+++ b/Demo/Program/openmp_demo.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Program/openmp_demo: example of user multithreading
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // This demo uses OpenMP, and should work if GraphBLAS is compiled to
 // use either OpenMP or pthreads to synchronize multiple user threadds.
 // If OpenMP is not available, this program will work fine without it, in a
@@ -120,6 +125,9 @@ int main (int argc, char **argv)
 
     // start GraphBLAS
     OK (GrB_init (GrB_NONBLOCKING)) ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "openmp demo, nthreads %d\n", nthreads) ;
 
     // Determine which user-threading model is being used.
     GxB_Thread_Model thread_safety ;
diff --git a/Demo/Program/pagerank_demo.c b/Demo/Program/pagerank_demo.c
index bae0536236..6c26950ab9 100644
--- a/Demo/Program/pagerank_demo.c
+++ b/Demo/Program/pagerank_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/pagerank_demo.c: PageRank via various semirings
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -51,8 +51,10 @@ int main (int argc, char **argv)
 
     double tic [2], t ;
     OK (GrB_init (GrB_NONBLOCKING)) ;
-    fprintf (stderr, "\npagerank_demo:\n") ;
-    printf  (        "\npagerank_demo:\n") ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "\npagerank_demo: nthreads: %d\n", nthreads) ;
+    printf  (        "\npagerank_demo: nthreads: %d\n", nthreads) ;
 
     //--------------------------------------------------------------------------
     // read a matrix from stdin
diff --git a/Demo/Program/pthread_demo.c b/Demo/Program/pthread_demo.c
index 43ae165124..0daf920dbb 100644
--- a/Demo/Program/pthread_demo.c
+++ b/Demo/Program/pthread_demo.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Program/pthread_demo: example of user multithreading
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // This demo requires pthreads, and should work if GraphBLAS is compiled to
 // use either OpenMP or pthreads to synchronize multiple user threadds.
 
@@ -133,6 +138,9 @@ int main (int argc, char **argv)
 
     // start GraphBLAS
     OK (GrB_init (GrB_NONBLOCKING)) ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    printf ("pthread demo, nthreads: %d\n", nthreads) ;
 
     // Determine which user-threading model is being used.
     GxB_Thread_Model thread_safety ;
diff --git a/Demo/Program/reduce_demo.c b/Demo/Program/reduce_demo.c
index 90941ced30..be5eeb7522 100644
--- a/Demo/Program/reduce_demo.c
+++ b/Demo/Program/reduce_demo.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Program/reduce_demo: reduce a matrix to a scalar
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 #include "GraphBLAS.h"
 
 // #define N 65536
@@ -16,7 +21,9 @@ int main (void)
 
     // start GraphBLAS
     GrB_init (GrB_NONBLOCKING) ;
-    printf ("demo: reduce a matrix to a scalar\n") ;
+    int nthreads ;
+    GxB_get (GxB_NTHREADS, &nthreads) ;
+    printf ("demo: reduce a matrix to a scalar, nthreads: %d\n", nthreads) ;
 
     int nthreads_max ;
     GxB_Global_Option_get (GxB_NTHREADS, &nthreads_max) ;
diff --git a/Demo/Program/simple_demo.c b/Demo/Program/simple_demo.c
index ef351c2a6b..74918f6575 100644
--- a/Demo/Program/simple_demo.c
+++ b/Demo/Program/simple_demo.c
@@ -2,7 +2,7 @@
 /* GraphBLAS/Demo/Program/simple_demo.c: tests simple_rand and simple_timer   */
 /* -------------------------------------------------------------------------- */
 
-/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved*/
+/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved*/
 /* http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.        */
 
 /* -------------------------------------------------------------------------- */
diff --git a/Demo/Program/tri_demo.c b/Demo/Program/tri_demo.c
index 5a05611f0a..782b7ee70c 100644
--- a/Demo/Program/tri_demo.c
+++ b/Demo/Program/tri_demo.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Program/tri_demo.c: count triangles
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -52,7 +52,9 @@ int main (int argc, char **argv)
     GrB_Info info ;
     double tic [2], r1, r2 ;
     OK (GrB_init (GrB_NONBLOCKING)) ;
-    fprintf (stderr, "tri_demo:\n") ;
+    int nthreads ;
+    OK (GxB_get (GxB_NTHREADS, &nthreads)) ;
+    fprintf (stderr, "tri_demo: nthreads %d\n", nthreads) ;
     printf ("--------------------------------------------------------------\n");
 
     //--------------------------------------------------------------------------
@@ -150,10 +152,10 @@ int main (int argc, char **argv)
 
         r1 = 1e-6*nedges / (t_dot [0] + t_dot [1] + t_U + t_L) ;
         r2 = 1e-6*nedges / (t_dot [0] + t_dot [1]) ;
-        printf ("rate %10.2f million edges/sec (incl time for U=triu(A))\n",r1);
-        printf ("rate %10.2f million edges/sec (just tricount itself)\n", r2);
+        printf ("rate %8.2f million edges/sec (incl time for U=triu(A))\n",r1);
+        printf ("rate %8.2f million edges/sec (just tricount itself)\n", r2);
         fprintf (stderr, "GrB: C<L>=L*U' (dot)   "
-                "rate %10.2f (with prep), %10.2f (tri)", r1, r2) ;
+                "rate %8.2f (w/ prep), %8.2f (tri)", r1, r2) ;
         if (nthreads > 1) fprintf (stderr, " speedup: %6.2f", t1/ t_dot [0]) ;
         fprintf (stderr, "\n") ;
     }
@@ -199,10 +201,10 @@ int main (int argc, char **argv)
 
         r1 = 1e-6*nedges / (t_dot [0] + t_dot [1] + t_U + t_L) ;
         r2 = 1e-6*nedges / (t_dot [0] + t_dot [1]) ;
-        printf ("rate %10.2f million edges/sec (incl time for U=triu(A))\n",r1);
-        printf ("rate %10.2f million edges/sec (just tricount itself)\n", r2);
+        printf ("rate %8.2f million edges/sec (incl time for U=triu(A))\n",r1);
+        printf ("rate %8.2f million edges/sec (just tricount itself)\n", r2);
         fprintf (stderr, "GrB: C<U>=U*L' (dot)   "
-                "rate %10.2f (with prep), %10.2f (tri)", r1, r2) ;
+                "rate %8.2f (w/ prep), %8.2f (tri)", r1, r2) ;
         if (nthreads > 1) fprintf (stderr, " speedup: %6.2f", t1/ t_dot [0]) ;
         fprintf (stderr, "\n") ;
     }
@@ -255,10 +257,10 @@ int main (int argc, char **argv)
 
         r1 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1] + t_L) ;
         r2 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1]) ;
-        printf ("rate %10.2f million edges/sec (incl time for L=tril(A))\n",r1);
-        printf ("rate %10.2f million edges/sec (just tricount itself)\n", r2);
+        printf ("rate %8.2f million edges/sec (incl time for L=tril(A))\n",r1);
+        printf ("rate %8.2f million edges/sec (just tricount itself)\n", r2);
         fprintf (stderr, "GrB: C<L>=L*L (saxpy)  "
-                "rate %10.2f (with prep), %10.2f (tri)", r1, r2) ;
+                "rate %8.2f (w/ prep), %8.2f (tri)", r1, r2) ;
         if (nthreads > 1) fprintf (stderr, " speedup: %6.2f", t1/ t_mark [0]) ;
         fprintf (stderr, "\n") ;
     }
diff --git a/Demo/Program/wildtype_demo.c b/Demo/Program/wildtype_demo.c
index cd5babb8e8..ac6f07a0d7 100644
--- a/Demo/Program/wildtype_demo.c
+++ b/Demo/Program/wildtype_demo.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Program/wildtype_demo: an arbitrary user-defined type
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Each "scalar" entry of this type consists of a 4x4 matrix and a string of
 // length 64.
 
@@ -148,6 +153,9 @@ int main (void)
 
     // start GraphBLAS
     GrB_init (GrB_NONBLOCKING) ;
+    int nthreads ;
+    GxB_get (GxB_NTHREADS, &nthreads) ;
+    fprintf (stderr, "wildtype demo: nthreads %d\n", nthreads) ;
 
     /* alternative method via #defines:
     fprintf (stderr, LINE2 "SuiteSparse:GraphBLAS Version %d.%d.%d, %s\n" LINE2
diff --git a/Demo/README.txt b/Demo/README.txt
index 63c7074e41..a752f5f9ef 100644
--- a/Demo/README.txt
+++ b/Demo/README.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 This is the GraphBLAS/Demo folder.  It contains a set of simple demo programs
diff --git a/Demo/Source/bfs5m.c b/Demo/Source/bfs5m.c
index 8867661d94..d399a43dee 100644
--- a/Demo/Source/bfs5m.c
+++ b/Demo/Source/bfs5m.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/bfs5m.c: breadth first search (vxm and assign/reduce)
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.
 // It now assumes the matrix is held by row (GxB_BY_ROW) and uses GrB_vxm
 // instead of GrB_mxv.  It now more closely matches the BFS example in the
@@ -60,7 +68,7 @@ GrB_Info bfs5m              // BFS of a graph (using vector assign & reduce)
     GrB_Monoid_new_BOOL (&Lor, GrB_LOR, (bool) false) ;
     GrB_Semiring_new (&Boolean, Lor, GrB_LAND) ;
     GrB_Descriptor_new (&desc) ;
-    GrB_Descriptor_set (desc, GrB_MASK, GrB_SCMP) ;     // invert the mask
+    GrB_Descriptor_set (desc, GrB_MASK, GrB_COMP) ;     // invert the mask
     GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE) ;  // clear q first
 
     //--------------------------------------------------------------------------
diff --git a/Demo/Source/bfs5m_check.c b/Demo/Source/bfs5m_check.c
index dbf766e6fe..95bb98b6eb 100644
--- a/Demo/Source/bfs5m_check.c
+++ b/Demo/Source/bfs5m_check.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/bfs5m_check.c: BFS with vxm and assign/reduce
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.
 // It now assumes the matrix is held by row (GxB_BY_ROW) and uses GrB_vxm
 // instead of GrB_mxv.  It now more closely matches the BFS example in the
@@ -73,7 +81,7 @@ GrB_Info bfs5m_check        // BFS of a graph (using vector assign & reduce)
 
     // descriptor: invert the mask for vxm, and clear output before assignment
     OK (GrB_Descriptor_new (&desc)) ;
-    OK (GxB_Desc_set (desc, GrB_MASK, GrB_SCMP)) ;
+    OK (GxB_Desc_set (desc, GrB_MASK, GrB_COMP)) ;
     OK (GxB_Desc_set (desc, GrB_OUTP, GrB_REPLACE)) ;
 
     //--------------------------------------------------------------------------
diff --git a/Demo/Source/bfs6.c b/Demo/Source/bfs6.c
index eba70abd9d..5c2e133836 100644
--- a/Demo/Source/bfs6.c
+++ b/Demo/Source/bfs6.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/bfs6.c: breadth first search (vxm and apply)
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.
 // It now assumes the matrix is held by row (GxB_BY_ROW) and uses GrB_vxm
 // instead of GrB_mxv.  It now more closely matches the BFS example in the
@@ -70,7 +78,7 @@ GrB_Info bfs6               // BFS of a graph (using unary operator)
     GrB_Semiring_new (&Boolean, Lor, GrB_LAND) ;
 
     GrB_Descriptor_new (&desc) ;
-    GrB_Descriptor_set (desc, GrB_MASK, GrB_SCMP) ;     // invert the mask
+    GrB_Descriptor_set (desc, GrB_MASK, GrB_COMP) ;     // invert the mask
     GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE) ;  // clear q first
 
     // create a unary operator
diff --git a/Demo/Source/bfs6_check.c b/Demo/Source/bfs6_check.c
index dc872b19c9..60bf356a88 100644
--- a/Demo/Source/bfs6_check.c
+++ b/Demo/Source/bfs6_check.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/bfs6_check.c: breadth first search using vxm
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.
 // It now assumes the matrix is held by row (GxB_BY_ROW) and uses GrB_vxm
 // instead of GrB_mxv.  It now more closely matches the BFS example in the
@@ -76,7 +84,7 @@ GrB_Info bfs6_check         // BFS of a graph (using unary operator)
 
     // descriptor: invert the mask for vxm, and clear output before assignment
     OK (GrB_Descriptor_new (&desc)) ;
-    OK (GxB_Desc_set (desc, GrB_MASK, GrB_SCMP)) ;
+    OK (GxB_Desc_set (desc, GrB_MASK, GrB_COMP)) ;
     OK (GxB_Desc_set (desc, GrB_OUTP, GrB_REPLACE)) ;
 
     // create a unary operator
diff --git a/Demo/Source/bfs_level.c b/Demo/Source/bfs_level.c
index 328f8621ce..fbc16ae8d6 100644
--- a/Demo/Source/bfs_level.c
+++ b/Demo/Source/bfs_level.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/bfs_level.c:  unary operator for bfs6.c
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 #include "demos.h"
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Source/dpagerank.c b/Demo/Source/dpagerank.c
index 55b69e2ea1..a1b9be2082 100644
--- a/Demo/Source/dpagerank.c
+++ b/Demo/Source/dpagerank.c
@@ -2,6 +2,11 @@
 // SuiteSparse/GraphBLAS/Demo/Source/dpagerank: pagerank using a real semiring
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
 // edge (i,j).  Self-edges are OK.  A can be of any built-in type.
 
diff --git a/Demo/Source/dpagerank2.c b/Demo/Source/dpagerank2.c
index 94faf90d35..eea9268491 100644
--- a/Demo/Source/dpagerank2.c
+++ b/Demo/Source/dpagerank2.c
@@ -2,6 +2,11 @@
 // SuiteSparse/GraphBLAS/Demo/Source/dpagerank2: pagerank using a real semiring
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // PageRank via EXTREME GraphBLAS-ing!
 
 // A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
@@ -14,9 +19,7 @@
 
 // This version operates on the original matrix A, without changing it.  The
 // entire computation is done via a set of user-defined objects:  a type,
-// several operators, a monoid, and a semiring.  If PAGERANK_PREEFINED is
-// defined at compile time, then the GraphBLAS PageRank_* objects are assumed
-// to be available as global objects.
+// several operators, a monoid, and a semiring.
 
 // Acknowledgements:  this method was written with input from Richard Veras,
 // Franz Franchetti, and Scott McMillan, Carnegie Mellon University.
@@ -61,8 +64,6 @@
 // scalar types and operators
 //------------------------------------------------------------------------------
 
-#ifndef PAGERANK_PREDEFINED
-
 // each node has a rank value, and a constant which is 1/outdegree
 typedef struct
 {
@@ -196,8 +197,6 @@ void pagerank_diff
     z->rank = delta * delta ;
 }
 
-#endif
-
 //------------------------------------------------------------------------------
 // comparison function for qsort
 //------------------------------------------------------------------------------
@@ -248,11 +247,6 @@ GrB_Info dpagerank2         // GrB_SUCCESS or error condition
     // create the new type, operators, monoid, and semiring
     //--------------------------------------------------------------------------
 
-    #ifndef PAGERANK_PREDEFINED
-
-    // PageRank_* objects are not defined at compile time (my_pagerank.m4 is
-    // not in the User/ directory).  Define them here at run-time:
-
     GrB_Type PageRank_type = NULL ;
     GrB_UnaryOp PageRank_div = NULL, PageRank_get = NULL, PageRank_init = NULL ;
     GrB_BinaryOp PageRank_accum = NULL, PageRank_add = NULL,
@@ -294,11 +288,6 @@ GrB_Info dpagerank2         // GrB_SUCCESS or error condition
     OK (GrB_BinaryOp_new (&PageRank_diff, pagerank_diff,
         PageRank_type, PageRank_type, PageRank_type)) ;
 
-    printf ("dpagerank2: pagerank objects defined at run-time\n") ;
-    #else
-    printf ("dpagerank2: pagerank objects defined at compile-time\n") ;
-    #endif
-
     //--------------------------------------------------------------------------
     // initializations
     //--------------------------------------------------------------------------
diff --git a/Demo/Source/drowscale.c b/Demo/Source/drowscale.c
index dc92663f93..763854066a 100644
--- a/Demo/Source/drowscale.c
+++ b/Demo/Source/drowscale.c
@@ -2,6 +2,11 @@
 // drowscale: scale the rows of an adjacency matrix by out-degree
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // on input, A is a square unsymmetric binary matrix of size n-by-n, of any
 // built-in type.  On output, C is a rowscaled version of A, of type
 // GrB_FP64, with C = D*A + I.  The diagonal matrix D has diagonal entries
diff --git a/Demo/Source/get_matrix.c b/Demo/Source/get_matrix.c
index 96573e4342..3a984c8899 100644
--- a/Demo/Source/get_matrix.c
+++ b/Demo/Source/get_matrix.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Source/get_matrix.c: get matrix from file, or create random
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -117,7 +117,7 @@ GrB_Info get_matrix         // get a matrix from stdin, or create random one
                 // diagonal is not touched by C<~Mask>=A.
                 OK (GrB_Descriptor_new (&desc)) ;
                 OK (GrB_Descriptor_set (desc, GrB_INP0, GrB_TRAN)) ;
-                OK (GrB_Descriptor_set (desc, GrB_MASK, GrB_SCMP)) ;
+                OK (GrB_Descriptor_set (desc, GrB_MASK, GrB_COMP)) ;
                 OK (GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE)) ;
                 OK (GrB_transpose (A, Mask, NULL, A, desc)) ;
                 GrB_Matrix_free (&Mask) ;
diff --git a/Demo/Source/import_test.c b/Demo/Source/import_test.c
index 8aff041bfa..88f07c0966 100644
--- a/Demo/Source/import_test.c
+++ b/Demo/Source/import_test.c
@@ -2,7 +2,7 @@
 // SuiteSparse/GraphBLAS/Demo/Source/import_test: test import/export
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Source/ipagerank.c b/Demo/Source/ipagerank.c
index c8bf9e91b7..83c4710f20 100644
--- a/Demo/Source/ipagerank.c
+++ b/Demo/Source/ipagerank.c
@@ -1,10 +1,10 @@
 //------------------------------------------------------------------------------
 // SuiteSparse/GraphBLAS/Demo/Source/ipagerank: pagerank using uint64 semiring
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+//------------------------------------------------------------------------------ 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+//------------------------------------------------------------------------------ 
 // A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
 // edge (i,j).  Self-edges are OK.  A can be of any built-in type.
 
diff --git a/Demo/Source/irowscale.c b/Demo/Source/irowscale.c
index ed81c8f71d..c1cc968ac7 100644
--- a/Demo/Source/irowscale.c
+++ b/Demo/Source/irowscale.c
@@ -2,6 +2,11 @@
 // irowscale: scale the rows of an adjacency matrix by out-degree
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // on input, A is a square unsymmetric binary matrix of size n-by-n, of any
 // built-in type.  On output, C is a rowscaled version of A, of type
 // GrB_UINT64, with C = D*A + I.  The diagonal matrix D has diagonal entries
diff --git a/Demo/Source/isequal.c b/Demo/Source/isequal.c
index 8a6e0d6080..bd40f04d06 100644
--- a/Demo/Source/isequal.c
+++ b/Demo/Source/isequal.c
@@ -2,9 +2,11 @@
 // isequal: check two matrices for exact equality
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+//------------------------------------------------------------------------------
+
 // isequal: check if two matrices are identically equal (same size,type,
 // pattern, size, and values).  Checking for the same type requires a function
 // that is an extension in SuiteSparse:GraphBLAS.  For the standard API, there
diff --git a/Demo/Source/mis.c b/Demo/Source/mis.c
index 228abef96f..d7c5007fcd 100644
--- a/Demo/Source/mis.c
+++ b/Demo/Source/mis.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/mis.c: maximal independent set
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.  It
 // now uses GrB_vxm instead of GrB_mxv.
 
@@ -87,7 +95,7 @@ GrB_Info mis                    // compute a maximal independent set
 
     // descriptor: C_replace + structural complement of mask
     GrB_Descriptor_new (&sr_desc) ;
-    GrB_Descriptor_set (sr_desc, GrB_MASK, GrB_SCMP) ;
+    GrB_Descriptor_set (sr_desc, GrB_MASK, GrB_COMP) ;
     GrB_Descriptor_set (sr_desc, GrB_OUTP, GrB_REPLACE) ;
 
     // create the mis_score binary operator
diff --git a/Demo/Source/mis_check.c b/Demo/Source/mis_check.c
index 0527142111..977c540651 100644
--- a/Demo/Source/mis_check.c
+++ b/Demo/Source/mis_check.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/mis_check.c: maximal independent set, w/error checking
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 // This method has been updated as of Version 2.2 of SuiteSparse:GraphBLAS.  It
 // now uses GrB_vxm instead of GrB_mxv.
 
@@ -131,7 +139,7 @@ GrB_Info mis_check              // compute a maximal independent set
 
     // descriptor: C_replace + structural complement of mask
     OK (GrB_Descriptor_new (&sr_desc)) ;
-    OK (GrB_Descriptor_set (sr_desc, GrB_MASK, GrB_SCMP)) ;
+    OK (GrB_Descriptor_set (sr_desc, GrB_MASK, GrB_COMP)) ;
     OK (GrB_Descriptor_set (sr_desc, GrB_OUTP, GrB_REPLACE)) ;
 
     // create the mis_score binary operator
diff --git a/Demo/Source/mis_score.c b/Demo/Source/mis_score.c
index 7d056d0eb3..2d56ba423c 100644
--- a/Demo/Source/mis_score.c
+++ b/Demo/Source/mis_score.c
@@ -2,10 +2,18 @@
 // GraphBLAS/Demo/Source/mis_score.c: set random score
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Modified from the GraphBLAS C API Specification, by Aydin Buluc, Timothy
 // Mattson, Scott McMillan, Jose' Moreira, Carl Yang.  Based on "GraphBLAS
 // Mathematics" by Jeremy Kepner.
 
+// No copyright claim is made for this particular file; the above copyright
+// applies to all of SuiteSparse:GraphBLAS, not this file.
+
 #include "demos.h"
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Source/prand.c b/Demo/Source/prand.c
index 164c594163..4f556f2a13 100644
--- a/Demo/Source/prand.c
+++ b/Demo/Source/prand.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Source/prand: parallel random number generator
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // A simple thread-safe parallel pseudo-random nuumber generator.
 
 #include "prand.h"
diff --git a/Demo/Source/random_matrix.c b/Demo/Source/random_matrix.c
index 1784ab4865..5a8ba4ea70 100644
--- a/Demo/Source/random_matrix.c
+++ b/Demo/Source/random_matrix.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Source/Source/random_matrix.c: create a random matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Source/read_matrix.c b/Demo/Source/read_matrix.c
index 5ad0fc3854..6f29ff1b54 100644
--- a/Demo/Source/read_matrix.c
+++ b/Demo/Source/read_matrix.c
@@ -2,7 +2,7 @@
 // GraphBLAS/Demo/Source/read_matrix.c: read a matrix from stdin
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Demo/Source/simple_rand.c b/Demo/Source/simple_rand.c
index 5b27a77991..47a9f2c6e7 100644
--- a/Demo/Source/simple_rand.c
+++ b/Demo/Source/simple_rand.c
@@ -2,7 +2,7 @@
 /* GraphBLAS/Demo/Source/simple_rand.c: a very simple random number generator */
 /* -------------------------------------------------------------------------- */
 
-/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved*/
+/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved*/
 /* http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.        */
 
 /* -------------------------------------------------------------------------- */
diff --git a/Demo/Source/simple_timer.c b/Demo/Source/simple_timer.c
index 4c234a6ea1..e84a05497e 100644
--- a/Demo/Source/simple_timer.c
+++ b/Demo/Source/simple_timer.c
@@ -2,7 +2,7 @@
 /* GraphBLAS/Demo/Source/simple_timer.c: a timer for performance measurements */
 /* -------------------------------------------------------------------------- */
 
-/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved*/
+/* SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved*/
 /* http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.        */
 
 /* -------------------------------------------------------------------------- */
diff --git a/Demo/Source/tricount.c b/Demo/Source/tricount.c
index b8c35259b3..8278048e1f 100644
--- a/Demo/Source/tricount.c
+++ b/Demo/Source/tricount.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Source/tricount.c: count the number of triangles in a graph
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 // Given a symmetric graph A with no-self edges, tricount counts the exact
 // number of triangles in the graph.
 
diff --git a/Demo/Source/usercomplex.c b/Demo/Source/usercomplex.c
index da7a1bf574..18be1ad349 100644
--- a/Demo/Source/usercomplex.c
+++ b/Demo/Source/usercomplex.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Source/usercomplex.c:  complex numbers as a user-defined type
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 #include "usercomplex.h"
 
 #if defined __INTEL_COMPILER
@@ -23,11 +28,12 @@
 #define BOOL(X) (X != ZERO)
 
 //------------------------------------------------------------------------------
-// 8 binary functions, z=f(x,y), where CxC -> C
+// binary functions, z=f(x,y), where CxC -> C
 //------------------------------------------------------------------------------
 
 void complex_first  (C Z, const C X, const C Y) { Z = X ; }
 void complex_second (C Z, const C X, const C Y) { Z = Y ; }
+void complex_pair   (C Z, const C X, const C Y) { Z = ONE ; }
 void complex_plus   (C Z, const C X, const C Y) { Z = X + Y ; }
 void complex_minus  (C Z, const C X, const C Y) { Z = X - Y ; }
 void complex_rminus (C Z, const C X, const C Y) { Z = Y - X ; }
@@ -94,7 +100,7 @@ void complex_max (C Z, const C X, const C Y)
 GrB_BinaryOp Complex_first = NULL, Complex_second = NULL, Complex_min = NULL,
              Complex_max   = NULL, Complex_plus   = NULL, Complex_minus = NULL,
              Complex_times = NULL, Complex_div    = NULL, Complex_rminus = NULL,
-             Complex_rdiv  = NULL ;
+             Complex_rdiv  = NULL, Complex_pair   = NULL ;
 
 //------------------------------------------------------------------------------
 // 6 binary functions, z=f(x,y), where CxC -> C ; (1,0) = true, (0,0) = false
@@ -202,9 +208,7 @@ GrB_UnaryOp Complex_complex_real = NULL, Complex_complex_imag = NULL ;
 // Complex type, scalars, monoids, and semiring
 //------------------------------------------------------------------------------
 
-#ifndef MY_COMPLEX
 GrB_Type Complex = NULL ;
-#endif
 GrB_Monoid   Complex_plus_monoid = NULL, Complex_times_monoid = NULL ;
 GrB_Semiring Complex_plus_times = NULL ;
 C Complex_1  = ONE ;
@@ -231,9 +235,7 @@ GrB_Info Complex_init ( )
     // create the Complex type
     //--------------------------------------------------------------------------
 
-    #ifndef MY_COMPLEX
     OK (GrB_Type_new (&Complex, sizeof (C))) ;    
-    #endif
 
     #undef C
     #undef D
@@ -246,6 +248,7 @@ GrB_Info Complex_init ( )
 
     OK (GrB_BinaryOp_new (&Complex_first  , complex_first  , C, C, C)) ;
     OK (GrB_BinaryOp_new (&Complex_second , complex_second , C, C, C)) ;
+    OK (GrB_BinaryOp_new (&Complex_pair   , complex_pair   , C, C, C)) ;
     OK (GrB_BinaryOp_new (&Complex_min    , complex_min    , C, C, C)) ;
     OK (GrB_BinaryOp_new (&Complex_max    , complex_max    , C, C, C)) ;
     OK (GrB_BinaryOp_new (&Complex_plus   , complex_plus   , C, C, C)) ;
@@ -364,6 +367,7 @@ GrB_Info Complex_finalize ( )
 
     GrB_BinaryOp_free (&Complex_first ) ;
     GrB_BinaryOp_free (&Complex_second) ;
+    GrB_BinaryOp_free (&Complex_pair  ) ;
     GrB_BinaryOp_free (&Complex_min   ) ;
     GrB_BinaryOp_free (&Complex_max   ) ;
     GrB_BinaryOp_free (&Complex_plus  ) ;
diff --git a/Demo/Source/wathen.c b/Demo/Source/wathen.c
index fd76502b09..7f9f1593f3 100644
--- a/Demo/Source/wathen.c
+++ b/Demo/Source/wathen.c
@@ -2,6 +2,11 @@
 // GraphBLAS/Demo/Source/wathen.c: a finite-element matrix on a regular mesh
 //------------------------------------------------------------------------------
 
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
 #include "demos.h"
 
 // Create a finite-element matrix on an nx-by-ny 2D mesh, as computed by
diff --git a/Demo/vdemo b/Demo/vdemo
index 65f812a239..33f1e945a3 100755
--- a/Demo/vdemo
+++ b/Demo/vdemo
@@ -1,6 +1,7 @@
 #!/bin/sh
 
-V="valgrind --error-limit=no --tool=memcheck -v --log-file=valgrind_%p.log --leak-check=full --show-leak-kinds=all"
+# V="valgrind --error-limit=no --tool=memcheck -q --log-file=valgrind_%p.log --leak-check=full --show-leak-kinds=all"
+V="valgrind --error-limit=no --tool=memcheck -q --log-file=valgrind_%p.log"
 
 $V ../build/wildtype_demo                         > wildtype_demo.out
 $V ../build/simple_demo                           > simple_demo.out
diff --git a/Doc/ChangeLog b/Doc/ChangeLog
index 9cd8e996f2..90145f5fa5 100644
--- a/Doc/ChangeLog
+++ b/Doc/ChangeLog
@@ -1,3 +1,25 @@
+Version 3.2.0, Feb 20, 2020
+
+    * GxB_*_define for user-defined compile-time objects: removed.  Not
+        compatible with the faster kernels for mxm and dense matrices/vectors.
+        Use the GrB_*_new functions in the GraphBLAS C API Specification
+        instead.
+    * faster saxpy-based matrix multiply (about 5x to 10x for mxv and vxm):
+        removed Sauna workspace.  Heap method removed.  Hash method added.
+    * better performance for dense matrix and vectors
+    * faster typecast of the mask matrix M: GB_mcast replaces cast_M
+    * added GB_BURBLE: for development diagnostics
+    * changed default chunk size: from 4K to 64K
+    * added the PAIR binary operator: f(x,y)=1
+    * added the ANY binary operator: f(x,y)=x, or f(x,y)=y ; arbitrary choice
+    * added structural mask: from v1.3 C API Specification
+    * added GrB_DESC_*: predefined descriptor, from v1.3 C API Specification
+    * many atomics added: for the faster matrix multiply.  These changes have
+        not been ported to Microsoft Visual Studio, which only supports a
+        subset of "#pragma omp atomic" statements.  This will be resolved in a
+        future version; in the meantime, use v3.1.2 with MS Visual Studio
+        instead of v3.2.0.
+
 Version 3.1.2, Dec 16, 2019
 
     * (15) bug fix in parallel matrix-matrix multiply: could occur when # of
diff --git a/Doc/GraphBLAS_API_C_v13.pdf b/Doc/GraphBLAS_API_C_v13.pdf
new file mode 100644
index 0000000000..0a230734ac
Binary files /dev/null and b/Doc/GraphBLAS_API_C_v13.pdf differ
diff --git a/Doc/GraphBLAS_UserGuide.bib b/Doc/GraphBLAS_UserGuide.bib
index f144947a34..0ec16f27e6 100644
--- a/Doc/GraphBLAS_UserGuide.bib
+++ b/Doc/GraphBLAS_UserGuide.bib
@@ -379,3 +379,27 @@ @inproceedings{Yang:2018:IPE
 	code =	 {https://github.com/owensgroup/push-pull}
 }
 
+@inproceedings{10.1145/3229710.3229720,
+author = {Nagasaka, Yusuke and Matsuoka, Satoshi and Azad, Ariful and Bulu\c{c}, Aydundefinedn},
+title = {High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore Architectures},
+year = {2018},
+isbn = {9781450365239},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3229710.3229720},
+doi = {10.1145/3229710.3229720},
+booktitle = {Proceedings of the 47th International Conference on Parallel Processing Companion},
+articleno = {Article 34},
+numpages = {10},
+keywords = {Intel KNL, SpGEMM, Sparse matrix},
+location = {Eugene, OR, USA},
+series = {ICPP ’18}
+}
+
+
+
+% [2] Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Aydın Buluç. 2018.
+% High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore
+% Architectures. In Proc. 47th Intl. Conf. on Parallel Processing (ICPP '18).
+% Association for Computing Machinery, New York, NY, USA, Article 34, 1–10.
+% DOI:https://doi.org/10.1145/3229710.3229720
diff --git a/Doc/GraphBLAS_UserGuide.pdf b/Doc/GraphBLAS_UserGuide.pdf
index 2ad69e6cb4..55d6125b68 100644
Binary files a/Doc/GraphBLAS_UserGuide.pdf and b/Doc/GraphBLAS_UserGuide.pdf differ
diff --git a/Doc/GraphBLAS_UserGuide.tex b/Doc/GraphBLAS_UserGuide.tex
index 267ce94f62..e871bebd29 100644
--- a/Doc/GraphBLAS_UserGuide.tex
+++ b/Doc/GraphBLAS_UserGuide.tex
@@ -105,6 +105,16 @@ \subsection{Release Notes:}
 
 \begin{itemize}
 
+\item Version 3.2.0 (Feb 20, 2020).  Faster \verb'GrB_mxm', \verb'GrB_mxv', and
+    \verb'GrB_vxm', and faster operations on dense matrices/vectors.  Removed
+    compile-time user objects (\verb'GxB_*_define'), since these were not
+    compatible with the faster matrix operations.  Added the \verb'ANY' and
+    \verb'PAIR' operators.  Added the predefined descriptor, \verb'GrB_DESC_*'.
+    Added the structural mask option.  Changed default chunk size to 65,536.
+    Note that v3.2.0 is not compatible with the MS Visual Studio compiler; use
+    v3.1.2 instead.
+    MATLAB interface modified:  \verb'GrB.init' is now optional.
+
 \item Version 3.1.2 (Dec, 2019).  Changes to allow SuiteSparse:GraphBLAS
     to be compiled with the Microsoft Visual Studio compiler.  This compiler
     does not support the \verb'_Generic' keyword, so the polymorphic functions
@@ -157,7 +167,8 @@ \subsection{Release Notes:}
 Version 2.2 (Nov 2018)
 adds user-defined objects at compile-time, via user \verb'*.m4' files placed in
 \verb'GraphBLAS/User', which use the \verb'GxB_*_define' macros described in
-Section~\ref{precompile}.  The default matrix format is now \verb'GxB_BY_ROW'.
+Section~\ref{precompile} (NOTE: feature removed in v3.2).
+The default matrix format is now \verb'GxB_BY_ROW'.
 % If you want the default format to be by column (the default in Version 2.1 and
 % earlier), just compile with \verb'-DBYCOL', or add \newline
 % \verb'GxB_set (GxB_FORMAT, GxB_BY_COL) ;'
@@ -344,9 +355,9 @@ \subsection{Overview of GraphBLAS methods and operations} %=====================
 and multiply operators, as long these few rules are followed.
 
 Just considering built-in types and operators, GraphBLAS can perform
-\verb'C=A*B' in 1040 unique semirings.  With typecasting, any of these 1040
+\verb'C=A*B' in 1355 unique semirings.  With typecasting, any of these 1355
 semirings can be applied to matrices \verb'C', \verb'A', and \verb'B' of any of
-the 11 types, in any combination.  This gives $1040 \times 11^3 = 1,384,240$
+the 11 types, in any combination.  This gives $1355 \times 11^3 = 1,803,505$
 possible kinds of sparse matrix multiplication supported by GraphBLAS, and this
 is counting just built-in types and operators.  By contrast, MATLAB provides
 just two semirings for its sparse matrix multiplication \verb'C=A*B':
@@ -512,7 +523,7 @@ \subsection{The accumulator and the mask} %=====================================
     \> \> $z_{ij} = t_{ij}$
     \end{tabbing} }
 The Accumulator Phase is followed by the Mask/Replace Phase, ${\bf C \langle M \rangle = Z}$
-as controlled by the \verb'GrB_REPLACE' and \verb'GrB_SCMP' descriptor options:
+as controlled by the \verb'GrB_REPLACE' and \verb'GrB_COMP' descriptor options:
     \vspace{-0.2in}
     % mask/replace/scmp: C<M> = Z
     {\small
@@ -521,12 +532,12 @@ \subsection{The accumulator and the mask} %=====================================
     \>{\bf Mask/Replace Phase}: compute ${\bf C \langle M \rangle = Z}$: \\
     \> \> if (\verb'GrB_REPLACE') delete all entries in ${\bf C}$ \\
     \> \> if \verb'Mask' is \verb'NULL' \\
-    \> \>\>    if (\verb'GrB_SCMP') \\
+    \> \>\>    if (\verb'GrB_COMP') \\
     \> \>\>\>      ${\bf C}$ is not modified \\
     \> \>\>    else \\
     \> \>\>\>      ${\bf C = Z}$ \\
     \> \> else \\
-    \> \>\>    if (\verb'GrB_SCMP') \\
+    \> \>\>    if (\verb'GrB_COMP') \\
     \> \>\>\>      ${\bf C \langle \neg M \rangle  = Z}$ \\
     \> \>\>    else \\
     \> \>\>\>      ${\bf C \langle M \rangle  = Z}$
@@ -879,11 +890,8 @@ \subsection{MATLAB Interface}
         MATLAB result.
     \item Solvers, so that \verb'x=A\b' could return a GF(2) solution,
         for example.
-    \item Some methods are slower than MATLAB.  Dense vectors, for example,
-        are faster with MATLAB full vectors than GraphBLAS sparse vectors.
-        These issues will be addressed in a future version.
     \item Sparse matrices with dimension higher than 2.  It would be
-        possible to map a N-dimensional matrix to a large 2D
+        possible to map an N-dimensional matrix to a large 2D
         hypersparse GraphBLAS matrix.
 \end{packed_itemize}
 
@@ -1472,8 +1480,8 @@ \section{GraphBLAS Objects and their Methods} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 is valid but it does not make a copy of the underlying object.
 
 GraphBLAS provides 11 built-in types and 157 built-in operators;
-SuiteSparse:GraphBLAS adds 121 additional built-in operators.  With these,
-44 unique monoids and 1040 unique semirings can be constructed.
+SuiteSparse:GraphBLAS adds 143 additional built-in operators.  With these,
+55 unique monoids and 1355 unique semirings can be constructed.
 
 \begin{spec}
 {\bf SPEC:} SuiteSparse:GraphBLAS predefines all unique monoids and semirings
@@ -1923,11 +1931,11 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 A binary operator is a scalar function of the form $z=f(x,y)$.  The types of
 $z$, $x$, and $y$ need not be the same.
 
-SuiteSparse:GraphBLAS has 19 kinds of built-in binary operators of the form $T
-\times T \rightarrow T$ that work on all 11 of the built-in types, $T$, for a
-total of 209 binary operators of this form.  These are listed in the table
+SuiteSparse:GraphBLAS has 21 kinds of built-in binary operators of the form $T
+\times T \rightarrow T$ that work on all 11 of the built-in types, $T$.
+These are listed in the table
 below.  For each of these operators, all domains (types) of the three operands
-are the same.  The six comparison operators and three logical operators all
+are the same.  The six comparison operators and the logical operators all
 return a result one for true and zero for false, in the same domain $T$ as
 their inputs.  These six comparison operators are useful as ``multiply''
 operators for creating semirings with non-Boolean monoids.
@@ -1939,9 +1947,11 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 GraphBLAS             & types (domains)            & expression      & description \\
 name                  &                            & $z=f(x,y)$      & \\
 \hline
-% 10 numeric TxT->T
+% numeric TxT->T
 \verb'GrB_FIRST_'$T$  & $T \times T \rightarrow T$ & $z = x$         & first argument \\
 \verb'GrB_SECOND_'$T$ & $T \times T \rightarrow T$ & $z = y$         & second argument \\
+\verb'GxB_ANY_'$T$    & $T \times T \rightarrow T$ & $z = x$ or $y$  & pick $x$ or $y$ arbitrarily \\
+\verb'GxB_PAIR_'$T$   & $T \times T \rightarrow T$ & $z = 1$         & one \\
 \verb'GrB_MIN_'$T$    & $T \times T \rightarrow T$ & $z = \min(x,y)$ & minimum \\
 \verb'GrB_MAX_'$T$    & $T \times T \rightarrow T$ & $z = \max(x,y)$ & maximum \\
 \verb'GrB_PLUS_'$T$   & $T \times T \rightarrow T$ & $z = x+y$       & addition \\
@@ -1951,7 +1961,7 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 \verb'GrB_DIV_'$T$    & $T \times T \rightarrow T$ & $z = x/y$       & division \\
 \verb'GxB_RDIV_'$T$   & $T \times T \rightarrow T$ & $z = y/x$       & reverse division \\
 \hline
-% 6 new TxT->T comparison
+% TxT->T comparison
 \verb'GxB_ISEQ_'$T$   & $T \times T \rightarrow T$ & $z = (x == y)$  & equal \\
 \verb'GxB_ISNE_'$T$   & $T \times T \rightarrow T$ & $z = (x \ne y)$ & not equal \\
 \verb'GxB_ISGT_'$T$   & $T \times T \rightarrow T$ & $z = (x >   y)$ & greater than \\
@@ -1959,7 +1969,7 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 \verb'GxB_ISGE_'$T$   & $T \times T \rightarrow T$ & $z = (x \ge y)$ & greater than or equal \\
 \verb'GxB_ISLE_'$T$   & $T \times T \rightarrow T$ & $z = (x \le y)$ & less than or equal  \\
 \hline
-% 3 TxT->T logical
+% TxT->T logical
 \verb'GxB_LOR_'$T$    & $T \times T \rightarrow T$ & $z = (x \ne 0) \vee    (y \ne 0) $ & logical OR \\
 \verb'GxB_LAND_'$T$   & $T \times T \rightarrow T$ & $z = (x \ne 0) \wedge  (y \ne 0) $ & logical AND \\
 \verb'GxB_LXOR_'$T$   & $T \times T \rightarrow T$ & $z = (x \ne 0) \veebar (y \ne 0) $ & logical XOR \\
@@ -1971,11 +1981,14 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 \begin{spec}
 {\bf SPEC:} The \verb'GxB_IS*_'$T$
 \verb'GxB_RMINUS_'$T$,
-and
 \verb'GxB_RDIV_'$T$
+\verb'GxB_ANY_'$T$,
+and
+\verb'GxB_PAIR_'$T$,
 operators, and the Boolean \verb'GxB_L*_'$T$ are extensions to the spec.
 \end{spec}
 
+\newpage
 Another set of six kinds of built-in comparison operators have the form $T
 \times T \rightarrow $\verb'bool'.  They are defined for all eleven built-in
 types, for a total of 66 binary operators.  Note that when $T$ is \verb'bool',
@@ -2028,11 +2041,6 @@ \subsection{GraphBLAS binary operators: {\sf GrB\_BinaryOp}, $z=f(x,y)$} %======
 }
 \vspace{0.2in}
 
-This gives a total of 278 built-in binary operators listed in the tables above:
-209 of the form $T \times T \rightarrow T$, 66 of the form $T \times T
-\rightarrow $ \verb'bool', and three purely Boolean.  There are 260 unique
-operators since 18 of the 28 Boolean operators are redundant.
-
 There are two sets of built-in comparison operators in SuiteSparse:Graph\-BLAS,
 but they are not redundant.  They are identical except for the type (domain) of
 their output, $z$.  The \verb'GrB_EQ_'$T$ and related operators compare their
@@ -2245,6 +2253,54 @@ \subsubsection{{\sf GrB\_BinaryOp\_free:} free a user-defined binary operator}
 handle, or if \verb'op == NULL' on input.
 It does nothing at all if passed a built-in binary operator.
 
+\newpage
+%-------------------------------------------------------------------------------
+\subsubsection{{\sf ANY} and {\sf PAIR} operators}
+%-------------------------------------------------------------------------------
+\label{any_pair}
+
+SuiteSparse:GraphBLAS v3.2.0 adds two new operators, \verb'ANY' and
+\verb'PAIR'.
+
+The \verb'PAIR' operator is simple to describe: just $f(x,y)=1$.  It is called
+the \verb'PAIR' operator since it returns 1 in a semiring when a pair of
+entries $a_{ik}$ and $b_{kj}$ is found in the matrix multiply.  This operator
+is simple yet very useful.  It allows purely symbolic computations to be
+performed on matrices of any type, without having to typecast them to Boolean
+with all values being true.  Typecasting need not be performed on the inputs to
+the \verb'PAIR' operator, and the \verb'PAIR' operator does not have to access
+the values of the matrix, so it is a very fast operator to use.
+
+The \verb'ANY' operator is very unusual, but very powerful.  It is the function
+$f(x,y)=x$, or $y$, where GraphBLAS has to freedom to select either $x$, or
+$y$, at its own discretion.  Do not confuse the \verb'ANY' operator with the
+\verb'any' function in MATLAB, which computes a reduction using the logical OR
+operator.
+
+The \verb'ANY' function is associative and commutative, and can thus serve as
+an operator for a monoid.  The selection of $x$ are $y$ is not randomized.
+Instead, SuiteSparse:GraphBLAS uses this freedom to compute as fast a result as
+possible.  When used in a dot product, \[ c_{ij} = \sum_k a_{ik} b_{kj} \] for
+example, the computation can terminate as soon as any matching pair of entries
+is found.  When used in a parallel saxpy-style computation, the \verb'ANY'
+operator allows for a relaxed form of synchronization to be used, resulting
+in a fast benign race condition.
+
+The result of the \verb'ANY' monoid is non-deterministic, unless it is
+coupled with the \verb'PAIR' multiplicative operator.  In this case,
+the \verb'ANY_PAIR' semiring will return a deterministic result,
+since $f(1,1)$ is always 1, for the \verb'ANY' operator.
+
+When paired with a different operator, the results are non-deterministic.  This
+gives a powerful method when computing results for which any value selected by
+the \verb'ANY' operator is valid.  One such example is the breadth-first-search
+tree.  Suppose node $j$ is at level $v$, and there are multiple nodes $i$ at
+level $v-1$ for which edge $(i,j)$ exists in the graph.  Any of these nodes $i$
+can serve as a valid parent in the BFS tree.  Using the \verb'ANY' operator,
+GraphBLAS can quickly compute a valid BFS tree; if it used again on the
+same inputs, it might return a different, yet still valid, BFS tree, due to
+the non-deterministic nature of intra-thread synchronization.
+
 \newpage
 %===============================================================================
 \subsection{SuiteSparse:GraphBLAS select operators: {\sf GxB\_SelectOp}} %======
@@ -2459,22 +2515,21 @@ \subsection{GraphBLAS monoids: {\sf GrB\_Monoid}} %=============================
 always holds.  That is, operator can be applied in any order and the results
 remain the same.
 
-Four kinds of built-in operators (\verb'MIN', \verb'MAX', \verb'PLUS',
-\verb'TIMES') can be used to form monoids for each of the ten non-Boolean
-built-in types, and 12 can be used for Boolean monoids, all of which are listed
-in the table below.  This is a total of 52 valid monoids that can be
-constructed from built-in types and operators, although 8 of the 12 Boolean
+Five kinds of built-in operators (\verb'MIN', \verb'MAX', \verb'PLUS',
+\verb'TIMES', and \verb'ANY') can be used to form monoids for each of the ten
+non-Boolean built-in types, and 12 can be used for Boolean monoids, all of
+which are listed in the table below.  These are the valid monoids that can be
+constructed from built-in types and operators, although 8 of the 13 Boolean
 monoids are redundant (the four remaining being \verb'OR', \verb'AND',
-\verb'XOR', and \verb'EQ').  There are thus a total of 44 unique monoids that
-can be constructed using built-in binary operators.  Since the built-in monoids
-are also commutative, all of them can be used to create a semiring.  Recall
-that a commutative operator $f(x,y)$ is one for which the condition
+\verb'XOR', \verb'EQ', and \verb'ANY').  There are a total of 55 unique monoids
+that can be constructed using built-in binary operators: five for each of the
+the 10 non-Boolean types, and 5 more for Boolean types.  Since the built-in
+monoids are also commutative, all of them can be used to create a semiring.
+Recall that a commutative operator $f(x,y)$ is one for which the condition
 $f(a,b)=f(b,a)$ always holds.  That is, the two operands can be swapped and the
 results remain the same.  One of the components of a semiring is a commutative
 monoid.
 
-% All 44 monoids are defined in \verb'GraphBLAS.h' file.
-
 \vspace{0.2in}
 {\footnotesize
 \begin{tabular}{lllll}
@@ -2482,13 +2537,14 @@ \subsection{GraphBLAS monoids: {\sf GrB\_Monoid}} %=============================
 GraphBLAS             & types (domains)            & expression      & identity  & terminal \\
 operator              &                            & $z=f(x,y)$      &           & \\
 \hline
-% 8 numeric TxT->T
+% numeric TxT->T
 \verb'GrB_MIN_'$T$    & $T \times T \rightarrow T$ & $z = \min(x,y)$ & $+\infty$ & $-\infty$ \\
 \verb'GrB_MAX_'$T$    & $T \times T \rightarrow T$ & $z = \max(x,y)$ & $-\infty$ & $+\infty$ \\
 \verb'GrB_PLUS_'$T$   & $T \times T \rightarrow T$ & $z = x+y$       & 0         & none \\
 \verb'GrB_TIMES_'$T$  & $T \times T \rightarrow T$ & $z = xy$        & 1         & 0 (not fp) \\
+\verb'GxB_ANY_'$T$    & $T \times T \rightarrow T$ & $z = x$ or $y$  & any       & any        \\
 \hline
-% 4 bool x bool -> bool
+% bool x bool -> bool
 \verb'GrB_LOR'        & \verb'bool' $\times$ \verb'bool' $\rightarrow$ \verb'bool' & $z = x \vee    y $ & false & true  \\
 \verb'GrB_LAND'       & \verb'bool' $\times$ \verb'bool' $\rightarrow$ \verb'bool' & $z = x \wedge  y $ & true  & false \\
 \verb'GrB_LXOR'       & \verb'bool' $\times$ \verb'bool' $\rightarrow$ \verb'bool' & $z = x \veebar y $ & false & none \\
@@ -2513,18 +2569,20 @@ \subsection{GraphBLAS monoids: {\sf GrB\_Monoid}} %=============================
 since a user-defined operator can be created with a terminal value that is not
 an annihilator.  See Section~\ref{monoid_terminal_new} for an example.
 
+The \verb'GxB_ANY_*' monoid can terminate as soon as it finds any value at all.
+
 The \verb'GrB_TIMES_FP*' operators do not have a terminal value of zero, since
 they comply with the IEEE 754 standard, and \verb'0*NaN' is not zero, but
 \verb'NaN'.  Technically, their terminal value is \verb'NaN', but this value is
 rare in practice and thus the terminal condition is not worth checking.
 
-SuiteSparse:GraphBLAS predefines each of the 44 unique monoids that can be
+SuiteSparse:GraphBLAS predefines each of the 55 unique monoids that can be
 constructed with built-in types and operators, with the naming convention
-\verb'GxB_op_type_MONOID'.  For the first 40, \verb'op' is \verb'MIN',
-\verb'MAX', \verb'PLUS', or \verb'TIMES', and \verb'type' is all but
-\verb'BOOL'.  The four Boolean monoids are \verb'GxB_LOR_BOOL_MONOID',
-\verb'GxB_LAND_BOOL_MONOID', \verb'GxB_LXOR_BOOL_MONOID', and
-\verb'GxB_EQ_BOOL_MONOID'.
+\verb'GxB_op_type_MONOID'.  For the first 50, \verb'op' is \verb'MIN',
+\verb'MAX', \verb'PLUS', \verb'TIMES', or \verb'ANY', and \verb'type' is all
+but \verb'BOOL'.  The five Boolean monoids are \verb'GxB_LOR_BOOL_MONOID',
+\verb'GxB_LAND_BOOL_MONOID', \verb'GxB_LXOR_BOOL_MONOID',
+\verb'GxB_EQ_BOOL_MONOID', and \verb'GxB_ANY_BOOL_MONOID'.
 
 The next sections define the following methods for the \verb'GrB_Monoid'
 object:
@@ -2548,7 +2606,7 @@ \subsection{GraphBLAS monoids: {\sf GrB\_Monoid}} %=============================
 {\bf SPEC:} The predefined monoids are an extension to the spec.
 \end{spec}
 
-% \newpage
+\newpage
 %-------------------------------------------------------------------------------
 \subsubsection{{\sf GrB\_Monoid\_new:} create a monoid}
 %-------------------------------------------------------------------------------
@@ -2772,7 +2830,7 @@ \subsection{GraphBLAS semirings: {\sf GrB\_Semiring}} %=========================
 multiplication of two sparse matrices in GraphBLAS, ${\bf C=AB}$.  The ``add''
 operator is a commutative and associative monoid, and the binary ``multiply''
 operator defines a function $z=fmult(x,y)$ where the type of $z$ matches the
-exactly with the monoid type.  SuiteSparse:GraphBLAS includes 1040
+exactly with the monoid type.  SuiteSparse:GraphBLAS includes 1355
 predefined built-in semirings, which are all those that can be constructed
 from built-in types and operators.  The next sections define the following
 methods for the \verb'GrB_Semiring' object:
@@ -2812,21 +2870,21 @@ \subsubsection{{\sf GrB\_Semiring\_new:} create a semiring}
 addition to the standard error cases, the function returns
 \verb'GrB_DOMAIN_MISMATCH' if the output (\verb'ztype') domain of
 \verb'multiply' does not match the domain of the \verb'add' monoid.  Using
-built-in types and operators, 1040 unique semirings can be built.  This count
+built-in types and operators, 1355 semirings can be built.  This count
 excludes redundant Boolean operators (for example \verb'GrB_TIMES_BOOL' and
 \verb'GrB_LAND' are different operators but they are redundant since they
 always return the same result).
 
 \vspace{-0.05in}
 \begin{itemize}
-\item 760 semirings with a multiplier $T \times T \rightarrow T$ where $T$ is
+\item 1000 semirings with a multiplier $T \times T \rightarrow T$ where $T$ is
     non-Boolean, from the complete cross product of:
 
     \vspace{-0.05in}
     \begin{itemize}
-    \item 4 add monoids (\verb'MIN', \verb'MAX', \verb'PLUS', \verb'TIMES')
-    \item 19 multiply operators
-    (\verb'FIRST', \verb'SECOND', \verb'MIN', \verb'MAX',
+    \item 5 add monoids (\verb'MIN', \verb'MAX', \verb'PLUS', \verb'TIMES', \verb'ANY')
+    \item 20 multiply operators
+    (\verb'FIRST', \verb'SECOND', \verb'PAIR', \verb'MIN', \verb'MAX',
     \verb'PLUS', \verb'MINUS', \verb'RMINUS', \verb'TIMES', \verb'DIV', \verb'RDIV',
     \verb'ISEQ', \verb'ISNE', \verb'ISGT',
     \verb'ISLT', \verb'ISGE', \verb'ISLE',
@@ -2834,34 +2892,34 @@ \subsubsection{{\sf GrB\_Semiring\_new:} create a semiring}
     \item 10 non-Boolean types, $T$
     \end{itemize}
 
-\item 240 semirings with a comparison operator $T \times T \rightarrow$
+\item 300 semirings with a comparison operator $T \times T \rightarrow$
     \verb'bool', where $T$ is non-Boolean, from the complete cross product of:
 
     \vspace{-0.05in}
     \begin{itemize}
-    \item 4 Boolean add monoids
-    (\verb'LAND', \verb'LOR', \verb'LXOR', \verb'EQ')
+    \item 5 Boolean add monoids
+    (\verb'LAND', \verb'LOR', \verb'LXOR', \verb'EQ', \verb'ANY')
     \item 6 multiply operators
     (\verb'EQ', \verb'NE', \verb'GT', \verb'LT', \verb'GE', \verb'LE')
     \item 10 non-Boolean types, $T$
     \end{itemize}
 
-\item 40 semirings with purely Boolean types, \verb'bool' $\times$ \verb'bool'
+\item 55 semirings with purely Boolean types, \verb'bool' $\times$ \verb'bool'
     $\rightarrow$ \verb'bool', from the complete cross product of:
 
     \vspace{-0.05in}
     \begin{itemize}
-    \item 4 Boolean add monoids
-    (\verb'LAND', \verb'LOR', \verb'LXOR', \verb'EQ')
-    \item 10 multiply operators
-    (\verb'FIRST', \verb'SECOND', \verb'LOR', \verb'LAND', \verb'LXOR',
+    \item 5 Boolean add monoids
+    (\verb'LAND', \verb'LOR', \verb'LXOR', \verb'EQ', \verb'ANY')
+    \item 11 multiply operators
+    (\verb'FIRST', \verb'SECOND', \verb'PAIR', \verb'LOR', \verb'LAND', \verb'LXOR',
     \verb'EQ', \verb'GT', \verb'LT', \verb'GE', \verb'LE')
     \end{itemize}
 
 \end{itemize}
 
 \vspace{-0.05in}
-SuiteSparse:GraphBLAS pre-defines all 1040 unique semirings that can be
+SuiteSparse:GraphBLAS pre-defines all 1355 unique semirings that can be
 constructed from built-in types and operators, as an extension to the spec.
 The naming convention is \verb'GxB_add_mult_type', where \verb'add' is the
 operator of the additive monoid, \verb'mult' is the multiply operator, and
@@ -5062,12 +5120,6 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
 % No GraphBLAS method (Section~\ref{objects}) is modified by a descriptor, and
 % neither are any unary or binary operators.
 
-\begin{spec}
-{\bf SPEC:} \verb'GxB_DEFAULT', \verb'GxB_NTHRADS', \verb'GxB_CHUNK',
-\verb'GxB_AxB_METHOD', and \verb'GxB_AxB_*'
-are extensions to the spec.
-\end{spec}
-
 The access to these parameters and their values is governed
 by two \verb'enum' types, \verb'GrB_Desc_Field' and \verb'GrB_Desc_Value':
 
@@ -5087,11 +5139,7 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
     GxB_AxB_METHOD = 1000, // descriptor for selecting C=A*B algorithm
 }
 GrB_Desc_Field ;
-\end{verbatim} } \end{mdframed}
 
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
 typedef enum
 {
     // for all GrB_Descriptor fields:
@@ -5099,19 +5147,28 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
     // for GrB_OUTP only:
     GrB_REPLACE = 1,    // clear the output before assigning new values to it
     // for GrB_MASK only:
-    GrB_SCMP = 2,       // use the structural complement of the input
+    GrB_COMP = 2,       // use the complement of the mask
+    GrB_STRUCTURE = 4,  // use the structure of the mask
     // for GrB_INP0 and GrB_INP1 only:
     GrB_TRAN = 3,       // use the transpose of the input
-
     // for GxB_AxB_METHOD only:
     GxB_AxB_GUSTAVSON = 1001,   // gather-scatter saxpy method
     GxB_AxB_HEAP      = 1002,   // heap-based saxpy method
-    GxB_AxB_DOT       = 1003    // dot product
+    GxB_AxB_DOT       = 1003,   // dot product
+    GxB_AxB_HASH      = 1004,   // hash-based saxpy method
+    GxB_AxB_SAXPY     = 1005    // saxpy method (any kind)
 }
 GrB_Desc_Value ;
 \end{verbatim} } \end{mdframed}
 
 \newpage
+
+\begin{spec}
+{\bf SPEC:} \verb'GxB_DEFAULT', \verb'GxB_NTHRADS', \verb'GxB_CHUNK',
+\verb'GxB_AxB_METHOD', and \verb'GxB_AxB_*'
+are extensions to the spec.
+\end{spec}
+
 The internal representation is opaque to the user, but in this User Guide the
 five descriptor fields of a descriptor \verb'desc' are illustrated as an array
 of five items, as described in the list below.  The underlying implementation
@@ -5141,7 +5198,7 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
     modified, even if \verb'Z(i,j)' is an entry with a different value; that
     value is simply discarded.
 
-    If the \verb'desc [GrB_MASK]' parameter is set to \verb'GrB_SCMP', then the
+    If the \verb'desc [GrB_MASK]' parameter is set to \verb'GrB_COMP', then the
     use of the mask is complemented.  In this case, if the mask is not present
     (\verb'Mask==NULL') then implicitly \verb'Mask(i,j)=0' for all \verb'i' and
     \verb'j'.  This means that none of ${\bf C}$ is modified and the entire
@@ -5155,6 +5212,16 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
     \verb'Z(i,j)' is an entry with a different value; that value is simply
     discarded.
 
+    If the \verb'desc [GrB_MASK]' parameter is set to \verb'GrB_STRUCTURE',
+    then the values of the mask are ignored, and just the pattern of the
+    entries is used.  Any entry \verb'M(i,j)' in the pattern is treated as if
+    it were true.
+
+    The \verb'GrB_COMP' and \verb'GrB_STRUCTURE' settings can be combined,
+    either by setting the mask option twice (once with each value), or by
+    setting the mask option to \verb'GrB_COMP+GrB_STRUCTURE' (the latter is an
+    extension to the spec).
+
     Using a parameter to complement the \verb'Mask' is very useful because
     constructing the actual complement of a very sparse mask is impossible
     since it has too many entries.  If the number of places in \verb'C'
@@ -5193,6 +5260,11 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
 
     \item \verb'GxB_DEFAULT' means that a method is selected automatically.
 
+    \item \verb'GxB_AxB_SAXPY': select any saxpy-based method:
+        \verb'GxB_AxB_GUSTAVSON', \verb'GxB_AxB_HEAP', and/or
+        \verb'GxB_AxB_HASH', or any mix of the three,
+        in contrast to the dot-product method.
+
     \item \verb'GxB_AxB_GUSTAVSON':  an extended version of Gustavson's method
     \cite{Gustavson78}, which is a very good general-purpose method, but
     sometimes the workspace can be too large.  Assuming all matrices are stored
@@ -5205,15 +5277,30 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
     {\em saxpy} operations, and using workspace of size $n$ per thread,
     corresponding to the number of columns of \verb'C'.
 
-    \item \verb'GxB_AxB_HEAP': a heap-based method, computing
-    \verb'C(:,j)=A*B(:,j)' via a heap of size equal to the maximum number of
-    entries in any column of \verb'B'.  The method is very good for hypersparse
-    matrices, particularly when $|{\bf B}|$ is less than the number of rows of
-    \verb'C'.  The method used is similar to Algorithm II in
-    \cite{BulucGilbert08} (see also \cite{BulucGilbert12}).  It computes
-    \verb'C' in the same order as Gustavson's method, using a heap instead of a
-    large gather/scatter workspace.  The heap has size $b$, equal to the
-    maximum number of entries in any one vector of \verb'B'.
+    \item \verb'GxB_AxB_HEAP':
+        no longer appears in v3.2.0, but will likely
+        be reintroduced in a future version.  This is silently replaced
+        with \verb'GxB_AxB_HASH'.
+
+%   a heap-based method, computing
+%   \verb'C(:,j)=A*B(:,j)' via a heap of size equal to the maximum number of
+%   entries in any column of \verb'B'.  The method is very good for hypersparse
+%   matrices, particularly when $|{\bf B}|$ is less than the number of rows of
+%   \verb'C'.  The method used is similar to Algorithm II in
+%   \cite{BulucGilbert08} (see also \cite{BulucGilbert12}).  It computes
+%   \verb'C' in the same order as Gustavson's method, using a heap instead of a
+%   large gather/scatter workspace.  The heap has size $b$, equal to the
+%   maximum number of entries in any one vector of \verb'B'.
+
+    \item \verb'GxB_AxB_HASH':  a hash-based method, based on 
+        \cite{10.1145/3229710.3229720}.  Very efficient for hypersparse
+        matrices, matrix-vector-multiply, and when $|{\bf B}|$ is small.
+
+% [2] Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Aydın Buluç. 2018.
+% High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore
+% Architectures. In Proc. 47th Intl. Conf. on Parallel Processing (ICPP '18).
+% Association for Computing Machinery, New York, NY, USA, Article 34, 1–10.
+% DOI:https://doi.org/10.1145/3229710.3229720
 
     \item \verb'GxB_AxB_DOT': computes \verb"C(i,j)=A(i,:)*B(j,:)'", for each
     entry \verb'C(i,j)'.  If the mask is present and not complemented, only
@@ -5233,7 +5320,7 @@ \subsection{GraphBLAS descriptors: {\sf GrB\_Descriptor}} %=====================
 
 \end{itemize}
 
-% \newpage
+\newpage
 %-------------------------------------------------------------------------------
 \subsubsection{{\sf GrB\_Descriptor\_new:}  create a new descriptor}
 %-------------------------------------------------------------------------------
@@ -5253,7 +5340,7 @@ \subsubsection{{\sf GrB\_Descriptor\_new:}  create a new descriptor}
 input matrix is transposed, and the method used in \verb'C=A*B' is
 selected automatically).
 
-\newpage
+% \newpage
 %-------------------------------------------------------------------------------
 \subsubsection{{\sf GrB\_Descriptor\_set:}  set a parameter in a descriptor}
 %-------------------------------------------------------------------------------
@@ -5272,8 +5359,12 @@ \subsubsection{{\sf GrB\_Descriptor\_set:}  set a parameter in a descriptor}
 
 \verb'GrB_Descriptor_set' sets a descriptor field (\verb'GrB_OUTP',
 \verb'GrB_MASK', \verb'GrB_INP0', \verb'GrB_INP1', or \verb'GxB_AxB_METHOD') to
-a particular value (\verb'GxB_DEFAULT', \verb'GrB_SCMP', \verb'GrB_TRAN',
-\verb'GrB_REPLACE', \verb'GxB_AxB_GUSTAVSON', \verb'GxB_AxB_HEAP', or
+a particular value (\verb'GxB_DEFAULT', \verb'GrB_COMP',
+\verb'GrB_STRUCTURE', \verb'GrB_COMP+GrB_STRUCTURE', \verb'GrB_TRAN',
+\verb'GrB_REPLACE', \verb'GxB_AxB_GUSTAVSON', \verb'GxB_AxB_HEAP',
+\verb'GxB_AxB_HASH',
+\verb'GxB_AxB_SAXPY',
+or
 \verb'GxB_AxB_DOT').
 
 \vspace{0.2in}
@@ -5301,10 +5392,16 @@ \subsubsection{{\sf GrB\_Descriptor\_set:}  set a parameter in a descriptor}
     The Mask is not complemented.  \verb'Mask(i,j)=1' means the value $C_{ij}$
     can be modified by the operation, while \verb'Mask(i,j)=0' means the value
     $C_{ij}$ shall not be modified by the operation.
-    & \verb'GrB_SCMP':
+    & \verb'GrB_COMP':
     The Mask is complemented.  \verb'Mask(i,j)=0' means the value $C_{ij}$
     can be modified by the operation, while \verb'Mask(i,j)=1' means the value
     $C_{ij}$ shall not be modified by the operation. \\
+    &
+    & \verb'GrB_STRUCTURE':
+    The values of the Mask are ignored.  If \verb'Mask(i,j)' is an entry
+    in the \verb'Mask' matrix, it is treated as if \verb'Mask(i,j)=1'.
+    The two options \verb'GrB_COMP' and \verb'GrB_STRUCTURE' can be
+    combined.  \\
 
 \hline
 
@@ -5420,8 +5517,72 @@ \subsubsection{{\sf GrB\_Descriptor\_free:} free a descriptor}
 safely does nothing if passed a \verb'NULL' handle, or if
 \verb'descriptor == NULL' on input.
 
-There are currently no predefined descriptors, but if these are added in the
-future, this function will do nothing if passed a built-in descriptor.
+\newpage
+%-------------------------------------------------------------------------------
+\subsubsection{{\sf GrB\_DESC\_*:}  predefined descriptors}
+%-------------------------------------------------------------------------------
+\label{descriptor_predefined}
+
+Version 1.3 of the GraphBLAS C API Specification adds predefined descriptors,
+and these have been added as of v3.2.0 of SuiteSparse:GraphBLAS.  They are
+listed in the table below.  These descriptors may not be modified or freed.
+Attempts to modify them result in an error (\verb'GrB_INVALID_VALUE'); attempts
+to free them are silently ignored.
+\verb'GrB_NULL' is the default descriptor, with all settings at their defaults:
+\verb'OUTP': do not replace the output,
+\verb'MASK': mask is valued and not complemented,
+\verb'INP0': first input not transposed, and
+\verb'INP1': second input not transposed.
+
+\vspace{0.02in}
+\noindent
+{\footnotesize
+\begin{tabular}{|l|lllll|}
+\hline
+Descriptor              &  \verb'OUTP'          & \verb'MASK'           & \verb'MASK'       & \verb'INP0'       & \verb'INP1'       \\
+                        &                       & structural            & complement        & & \\
+\hline
+\verb'GrB_NULL'         &   -                   & -                     & -                 & -                 & -                 \\
+\verb'GrB_DESC_T1'      &   -                   & -                     & -                 & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_T0'      &   -                   & -                     & -                 & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_T0T1'    &   -                   & -                     & -                 & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_C'       &   -                   & -                     & \verb'GrB_COMP'   & -                 & -                 \\
+\verb'GrB_DESC_CT1'     &   -                   & -                     & \verb'GrB_COMP'   & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_CT0'     &   -                   & -                     & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_CT0T1'   &   -                   & -                     & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_S'       &   -                   & \verb'GrB_STRUCTURE'  & -                 & -                 & -                 \\
+\verb'GrB_DESC_ST1'     &   -                   & \verb'GrB_STRUCTURE'  & -                 & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_ST0'     &   -                   & \verb'GrB_STRUCTURE'  & -                 & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_ST0T1'   &   -                   & \verb'GrB_STRUCTURE'  & -                 & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_SC'      &   -                   & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & -                 & -                 \\
+\verb'GrB_DESC_SCT1'    &   -                   & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_SCT0'    &   -                   & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_SCT0T1'  &   -                   & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_R'       &   \verb'GrB_REPLACE'  & -                     & -                 & -                 & -                 \\
+\verb'GrB_DESC_RT1'     &   \verb'GrB_REPLACE'  & -                     & -                 & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_RT0'     &   \verb'GrB_REPLACE'  & -                     & -                 & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_RT0T1'   &   \verb'GrB_REPLACE'  & -                     & -                 & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_RC'      &   \verb'GrB_REPLACE'  & -                     & \verb'GrB_COMP'   & -                 & -                 \\
+\verb'GrB_DESC_RCT1'    &   \verb'GrB_REPLACE'  & -                     & \verb'GrB_COMP'   & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_RCT0'    &   \verb'GrB_REPLACE'  & -                     & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_RCT0T1'  &   \verb'GrB_REPLACE'  & -                     & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_RS'      &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & -                 & -                 & -                 \\
+\verb'GrB_DESC_RST1'    &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & -                 & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_RST0'    &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & -                 & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_RST0T1'  &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & -                 & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\verb'GrB_DESC_RSC'     &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & -                 & -                 \\
+\verb'GrB_DESC_RSCT1'   &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & -                 & \verb'GrB_TRAN'   \\
+\verb'GrB_DESC_RSCT0'   &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & -                 \\
+\verb'GrB_DESC_RSCT0T1' &   \verb'GrB_REPLACE'  & \verb'GrB_STRUCTURE'  & \verb'GrB_COMP'   & \verb'GrB_TRAN'   & \verb'GrB_TRAN'   \\
+\hline
+\end{tabular}}
 
 \newpage
 %===============================================================================
@@ -5533,11 +5694,11 @@ \subsection{OpenMP parallelism}
 maximum number of threads to use (which may differ from the
 \verb'omp_get_max_threads' value), and a parameter called the \verb'chunk'.
 Suppose \verb'work' is a measure of the work an operation needs to perform (say
-the number of nonzeros in the two input matrices for \verb'GrB_eWiseAdd').  No
+the number of entries in the two input matrices for \verb'GrB_eWiseAdd').  No
 more than \verb'floor(work/chunk)' threads will be used (or one thread if the
 ratio is less than 1).
 
-The default \verb'chunk' value is 4096, but this may change in future versions,
+The default \verb'chunk' value is 65,536, but this may change in future versions,
 or it may be modified when GraphBLAS is installed on a particular machine.
 
 Both parameters can be set in two ways:
@@ -5578,7 +5739,7 @@ \subsection{OpenMP parallelism}
 
 If either parameter is set to \verb'GxB_DEFAULT', then default values are used.
 The default for \verb'nthreads_max' is the return value from
-\verb'omp_get_max_threads', and the default chunk size is currently 4096.
+\verb'omp_get_max_threads', and the default chunk size is currently 65,536.
 
 If a descriptor value for either parameter is left at its default, or set to
 \verb'GxB_DEFAULT', then the global setting is used.  This global setting may
@@ -5828,10 +5989,9 @@ \subsection{Hypersparse matrices}
 converted from non-hypersparse to hypersparse format if it has fewer than 10\%
 non-empty columns.  If it is hypersparse, it is a candidate for conversion to
 non-hypersparse if has 20\% or more non-empty columns.  If it has between 10\%
-and 20\% non-empty columns, it remains in whatever format it is currently in.
-
+and 20\% non-empty columns, it remains in its current format.
 MATLAB only supports a non-hypersparse CSC format.  The format in
-SuiteSparse:GraphBLAS that is equivalent to the MATLAB format is given below:
+SuiteSparse:GraphBLAS that is equivalent to the MATLAB format is:
 
 {\footnotesize
 \begin{verbatim}
@@ -5915,7 +6075,6 @@ \subsection{Other global options}
 The \verb'GxB_THREADING' option returns the internal parallelism used inside
 SuiteSparse:GraphBLAS, depending on how the library was compiled:
 
-\newpage
 {\footnotesize
 \begin{verbatim}
     GxB_Thread_Model threading ;
@@ -6194,7 +6353,9 @@ \subsection{Summary of usage of {\sf GxB\_set} and {\sf GxB\_get}}
     GxB_get (GrB_Descriptor d, GrB_OUTP, GrB_Desc_Value *v) ;
 
     GxB_set (GrB_Descriptor d, GrB_MASK, GxB_DEFAULT) ;
-    GxB_set (GrB_Descriptor d, GrB_MASK, GrB_SCMP) ;
+    GxB_set (GrB_Descriptor d, GrB_MASK, GrB_COMP) ;
+    GxB_set (GrB_Descriptor d, GrB_MASK, GrB_STRUCTURE) ;
+    GxB_set (GrB_Descriptor d, GrB_MASK, GrB_COMP+GrB_STRUCTURE) ;
     GxB_get (GrB_Descriptor d, GrB_MASK, GrB_Desc_Value *v) ;
 
     GxB_set (GrB_Descriptor d, GrB_INP0, GxB_DEFAULT) ;
@@ -6208,6 +6369,8 @@ \subsection{Summary of usage of {\sf GxB\_set} and {\sf GxB\_get}}
     GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_DEFAULT) ;
     GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON) ;
     GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HEAP) ;
+    GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HASH) ;
+    GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_SAXPY) ;
     GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_DOT) ;
     GxB_get (GrB_Descriptor d, GrB_AxB_METHOD, GrB_Desc_Value *v) ;
 
@@ -6604,7 +6767,7 @@ \subsection{The GraphBLAS specification in {MATLAB}} %==========================
 \verb'GB_spec_descriptor.m'     & mimics a GraphBLAS descriptor \\
 \verb'GB_spec_identity.m'       & returns the identity of a monoid \\
 \verb'GB_spec_matrix.m'         & conforms a MATLAB sparse matrix to GraphBLAS \\
-\verb'GB_define*.m'             & creates draft of \verb'GraphBLAS.h' \\
+\verb'GB_define.m'              & creates draft of \verb'GraphBLAS.h' \\
 \hline
 \end{tabular}
 }
@@ -8100,7 +8263,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %=============
     the submatrix is ever modified.  In \verb'GrB_assign', it is possible to
     delete entries in \verb'C' outside the submatrix, but only in one specific
     manner.  Suppose the mask \verb'M' is present (or, suppose it is not
-    present but \verb'GrB_SCMP' is true).  After (optionally) complementing the
+    present but \verb'GrB_COMP' is true).  After (optionally) complementing the
     mask, the value of \verb'M(i,j)' can be 0 for some entry outside the
     \verb'C(I,J)' submatrix.  If the \verb'GrB_REPLACE' descriptor is
     true, \verb'GrB_assign' deletes this entry.
@@ -8110,7 +8273,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %=============
 \verb'GxB_subassign' and \verb'GrB_assign' are identical if \verb'GrB_REPLACE'
 is set to its default value of false, and if the masks happen to be the same.
 The two masks can be the same in two cases:  either the \verb'Mask' input is
-\verb'NULL' (and it is not complemented via \verb'GrB_SCMP'), or \verb'I' and
+\verb'NULL' (and it is not complemented via \verb'GrB_COMP'), or \verb'I' and
 \verb'J' are both \verb'GrB_ALL'.
 If all these conditions hold,
 the two algorithms are identical and have the same performance.  Otherwise,
@@ -8143,7 +8306,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %=============
 \end{tabular}
 \vspace{0.05in}
 
-This notation does not include the details of the \verb'GrB_SCMP' and
+This notation does not include the details of the \verb'GrB_COMP' and
 \verb'GrB_REPLACE' descriptors, but it does illustrate the difference in the
 \verb'Mask'.  In the subassign, \verb'Mask' is the same size as \verb'C(I,J)'
 and \verb'A'.  If \verb'I[0]=i' and \verb'J[0]=j', Then \verb'Mask(0,0)'
@@ -8218,7 +8381,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %=============
 otherwise.  This is $m_{ij}$ for \verb'GrB_assign', and $m_{i'j'}$ for
 \verb'GxB_subassign', to reflect the difference in the mask, but this
 difference is not reflected in the table.  The value 1 or 0 is the value of the
-entry in the mask after it is optionally complemented via the \verb'GrB_SCMP'
+entry in the mask after it is optionally complemented via the \verb'GrB_COMP'
 option.
 
 Finally, the last column is the action taken in this case.  It is left blank if
@@ -8283,7 +8446,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %=============
 submatrix, but \verb'GrB_assign' can modify them in two cases listed in
 Table~\ref{outsubmatrix}.  When the \verb'GrB_REPLACE' option is selected, and
 when the \verb'Mask(i,j)' for an entry $c_{ij}$ is false (or if the
-\verb'Mask(i,j)' is true and \verb'GrB_SCMP' is enabled via the descriptor),
+\verb'Mask(i,j)' is true and \verb'GrB_COMP' is enabled via the descriptor),
 then the entry is deleted by \verb'GrB_assign'.
 
 The fourth column of Table~\ref{outsubmatrix} differs from
@@ -8354,7 +8517,7 @@ \subsubsection{Example}
     \right]
 \]
 
-Suppose \verb'GrB_REPLACE' is true, and \verb'GrB_SCMP' is false.  Let the
+Suppose \verb'GrB_REPLACE' is true, and \verb'GrB_COMP' is false.  Let the
 \verb'Mask' be:
 
 \[
@@ -9062,7 +9225,7 @@ \subsection{{\sf GrB\_transpose:} transpose a matrix} %=========================
     // C<~Mask> = A, clearing C first.  No transpose.
     GrB_Descriptor_new (&desc) ;
     GrB_Descriptor_set (desc, GrB_INP0, GrB_TRAN) ;
-    GrB_Descriptor_set (desc, GrB_MASK, GrB_SCMP) ;
+    GrB_Descriptor_set (desc, GrB_MASK, GrB_COMP) ;
     GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE) ;
     GrB_transpose (A, Mask, NULL, A, desc) ; \end{verbatim}}
 
@@ -9499,492 +9662,17 @@ \subsection{Performance and portability considerations}
 
 Most of the rest of the report is self-explanatory.
 
-\newpage
+% \newpage
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Creating user-defined objects at compile-time} %%%%%%%%%%%%%%%%%%%%%%%%
+\section{Creating user-defined objects at compile-time (feature removed)} %%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \label{precompile}
 
-\begin{spec}
-{\bf SPEC:} The GraphBLAS API has no mechanism for constructing user-defined
-objects when GraphBLAS is compiled.  This entire section, and the
-\verb'GxB_*_define' macros, are extensions to the specification.
-\end{spec}
-
-User-defined types, operators, monoids, and semirings provide a powerful and
-flexible mechanism for extending GraphBLAS functionality.  For example,
-GraphBLAS does not support a built-in complex type, but it can be added in a
-user application with a few simple functions, and a few calls to
-\verb'GrB_Type_new', \verb'GrB_BinaryOp_new', \verb'GrB_Monoid_new', and
-\verb'GrB_Semiring_new'.  A complete example is given in Section~\ref{user}.
-See in particular the example code in \verb'Demo/Source/usercomplex.c'.
-
-GraphBLAS does not need to be recompiled in order for the user application to
-add new types, operators, monoids, or semirings.  This flexibility comes at the
-cost of performance, however.  Since the multiply/add operations in a
-user-defined semiring must be accessed one at a time through a function
-pointer, a complex matrix multiply via \verb'GrB_mxm' is about two or three
-times slower than it could be if GraphBLAS included a built-in complex type.
-
-This performance gap could be solved in two ways.  Complex operators and types
-could be added to SuiteSparse:GraphBLAS, or the GraphBLAS C API
-Specification itself, but this is not flexible.  Other user-defined objects
-would still need to be created.
-
-The mechanism described in this section is another solution.  It allows the
-user to create an unlimited variety of user-defined types, operators, monoids,
-and semirings, and have them compiled into SuiteSparse:GraphBLAS.  This is done
-by creating one or more files with the filename extension \verb'*.m4', and
-placing them in the \verb'SuiteSparse/GraphBLAS/User' directory.  Then when
-SuiteSparse:GraphBLAS is compiled via \verb'cmake', these new objects are
-compiled as well.  This is illustrated by the example in
-Figure~\ref{fig:complex}.  It is a single file,
-\verb'GraphBLAS/User/Example/my_complex.m4', that defines the complex type
-\verb'double complex' as a GraphBLAS \verb'GrB_Type' called \verb'My_Complex'.
-
-If the file resides in its default location in \verb'GraphBLAS/User/Example',
-it is not compiled with GraphBLAS.  Moving this file into
-\verb'GraphBLAS/User', or creating other \verb'm4' files in the
-\verb'GraphBLAS/User' directory, enables SuiteSparse:GraphBLAS to incorporate
-these user-defined objects at compile-time.  Any number of \verb'*.m4' files
-may be placed in the \verb'GraphBLAS/User' directory; they are all included in
-SuiteSparse:GraphBLAS.  There is no need to tell the \verb'cmake' process what
-the file names are.  All \verb'*.m4' files in \verb'GraphBLAS/User' will be
-found and included.
-
-Of course, a C++ API for GraphBLAS could also create user-defined objects at
-compile-time via templates, but the API for GraphBLAS is in C, not C++.  The
-solution described in this Section provides this functionality in a purely C
-interface.
-
-In Figure~\ref{fig:complex}, two inline functions are defined to perform
-complex addition and multiplication, and these are used as the basis for two
-GraphBLAS binary operators, \verb'My_complex_plus' and \verb'My_complex_times'.
-A monoid is constructed for complex addition, and then finally the complex
-plus-times semiring is defined.  All of these user-defined objects are appended
-to the \verb'GraphBLAS.h' include file.
-
-\begin{figure}
-\begin{mdframed}[leftmargin=-0.4in,userdefinedwidth=5.8in]
-{\footnotesize
-\begin{verbatim}
-#ifdef GxB_USER_INCLUDE
-
-    // Get complex.h but remove "I" since it is used elsewhere in GraphBLAS.
-    #include <complex.h>
-    #undef I
-
-    // Not all complex.h definitions include the CMPLX macro
-    #ifndef CMPLX
-    #define CMPLX(real,imag) \
-        ( (double complex)((double)(real)) + \
-          (double complex)((double)(imag) * _Complex_I) )
-    #endif
-
-    // define a token so a user application can check for existence
-    #define MY_COMPLEX
-
-    static inline void my_complex_plus
-    (
-        double complex *z, const double complex *x, const double complex *y
-    )
-    {
-        (*z) = (*x) + (*y) ;
-    }
-
-    static inline void my_complex_times
-    (
-        double complex *z, const double complex *x, const double complex *y
-    )
-    {
-        (*z) = (*x) * (*y) ;
-    }
-#endif
-
-// define the complex type, plus & times operators, plus monoid, and semiring
-GxB_Type_define(My_Complex, double complex) ;
-GxB_BinaryOp_define(My_Complex_plus,
-    my_complex_plus,  My_Complex, My_Complex, My_Complex) ;
-GxB_BinaryOp_define(My_Complex_times, my_complex_times,
-    My_Complex, My_Complex, My_Complex) ;
-GxB_Monoid_define(My_Complex_plus_monoid, My_Complex_plus, CMPLX(0,0)) ;
-GxB_Semiring_define(My_Complex_plus_times, My_Complex_plus_monoid,
-    My_Complex_times) ; \end{verbatim}}
-\end{mdframed}
-\caption{User-defined complex type and operators, defined at compile-time
-\label{fig:complex}}
-\end{figure}
-
-Some user definitions such as the \verb'static inline' functions
-\verb'my_complex_plus' and \verb'my_complex_times' in Figure~\ref{fig:complex}
-should appear in the \verb'GraphBLAS.h' include file, so that they are
-available to any function in the user application.  Other user declarations
-should appear only once, such as the declaration of global values used by
-user-defined functions.
-
-To handle this, an \verb'#ifdef GxB_USER_INCLUDE' mechanism is provided for
-use in the \verb'User/*m4' file, in the following style:
-
-{\footnotesize
-\begin{verbatim}
-    #ifdef GxB_USER_INCLUDE
-
-        // Part 1: any code here will be active in GraphBLAS.h.  Place all
-        // declarations suitable for use in an #include file here.  These
-        // declarations will be available to all user application files
-        // and to all internal SuiteSparse/GraphBLAS codes.
-
-    #else
-
-        // Part 2: declarations of user-defined variables, and executable
-        // code that should be compiled just once appears here.
-
-    #endif \end{verbatim}}
-
-\noindent
-Placing user functions inside this \verb'#ifdef' structure gives the user
-control over what declarations should be available to all of GraphBLAS and to
-all user application files (Part 1), and what definitions should appear just
-once (Part 2).
-
-\newpage
-For example, \verb'User/Example/my_scale.m4' in
-Figure~\ref{fig:scale} defines a unary operator that computes
-\verb'z=my_scalar*x', where \verb'my_scalar' is a global variable.
-The declaration of \verb'my_scalar' should appear in \verb'GraphBLAS.h', but
-it should be defined only once.
-
-The \verb'GxB_*_define' macros must not appear inside either Part 1 or Part 2
-of the \verb'#ifdef GxB_USER_INCLUDE'.
-
-\begin{figure}
-\begin{mdframed}[leftmargin=-0.4in,userdefinedwidth=5.8in]
-{\footnotesize
-\begin{verbatim}
-#ifdef GxB_USER_INCLUDE
-
-    //--------------------------------------------------------------------------
-    // declarations: for GraphBLAS.h
-    //--------------------------------------------------------------------------
-
-    // The following are declarations that are enabled in GraphBLAS.h and
-    // appear in all user codes that #include "GraphBLAS.h", and also in all
-    // internal GraphBLAS codes.  All user declarations (not definitions)
-    // should appear here.
-
-    #define MY_SCALE
-
-    extern double my_scalar ;
-
-    static inline void my_scale
-    (
-        double *z,
-        const double *x
-    )
-    {
-        (*z) = my_scalar * (*x) ;
-    }
-
-#else
-
-    //--------------------------------------------------------------------------
-    // definitions: code appears just once, in Source/all_user_objects.c
-    //--------------------------------------------------------------------------
-
-    // The following defintions are enabled in only a single place:
-    // SuiteSparse/GraphBLAS/Source/all_user_objects.c.  This is the place
-    // where all user-defined global variables should be defined.
-
-    double my_scalar = 0 ;
-
-#endif
-
-// Unary operator to compute z = my_scalar*x
-GxB_UnaryOp_define(My_scale, my_scale, GrB_FP64, GrB_FP64) ; \end{verbatim}}
-\end{mdframed}
-\caption{User-defined unary operator, dependent upon a global variable
-\label{fig:scale}}
-\end{figure}
-
-When creating user-defined objects in a \verb'User/*.m4' a useful (but
-optional) strategy is to define a token that can be used to discover whether or
-not a particular object is available at compile time.  For example, for the
-\verb'My_complex' objects in Figure~\ref{fig:complex}, the following code
-snippet could appear in a user application:
-
-{\footnotesize
-\begin{verbatim}
-    #ifndef MY_COMPLEX
-    // construct complex type and operators at run-time
-    GrB_Type My_Complex ;
-    GrB_Type_new (&My_Complex, sizeof (double complex)) ;
-    ... etc
-    #else
-    // use the pre-defined My_Complex type and corresponding objects
-    #endif \end{verbatim}}
-
-\noindent
-In either case, when the user application is finished, it can do
-\verb'GrB_free(My_Complex)'.  This safely does nothing if \verb'My_Complex' is
-defined at compile-time with \verb'GxB_Type_define', or frees it if the type
-was created a run-time with \verb'GrB_Type_new'.
-
-These pre-defined objects can be used in the user application just as if they
-were created at run-time via the corresponding calls to \verb'GrB_Type_new',
-\verb'GrB_BinaryOp_new', \verb'GrB_Monoid_new', and \verb'GrB_Semiring_new'.
-For a large matrix, computing \verb'C=A*B' via \verb'GrB_mxm' with the
-pre-compiled \verb'My_Complex_plus_times' semiring is about 10\% faster than
-the MATLAB statement \verb'C=A*B' for sparse complex matrices.  This is the same
-relative performance as when computing \verb'C=A*B' for real matrices in
-MATLAB, versus using \verb'GrB_mxm' with the built-in
-\verb'GxB_PLUS_TIMES_FP64' semiring.  If the complex plus-times semiring is
-defined at run-time instead, \verb'GrB_mxm' is about two to three times slower.
-
-User definitions in the \verb'User/*m4' files are visible to all internal
-SuiteSparse:GraphBLAS functions and thus must be given names that do not
-conflict with internal variables, functions, and macros.
-
-Objects defined by \verb'GxB_*_define', user-defined static inline functions,
-typedefs, global variables, and macros (via \verb'#define') in the
-\verb'User/*.m4' files must be given names with a unique prefix, such as
-\verb'MY_', \verb'USER_', \verb'my_', \verb'user_',
-or the name of the user application (say
-\verb'RealCoolApp_', or \verb'PAGERANK_' as exemplified in the
-\verb'my_pagerank.m4' example).  This will ensure that no name conflicts will
-occur.
-
-An example macro name conflict occurs in the \verb'User/Example/my_complex.m4'
-example in Figure~\ref{fig:complex}, which includes the ANSI \verb'complex.h'
-include file.  The \verb'complex.h' file defines an ANSI C11 macro \verb'I' but
-that name conflicts with internal SuiteSparse:GraphBLAS variables, so
-\verb'#undef I' is done immediately after the \verb'complex.h' file is
-included.  Undefining \verb'I' is permitted in the ANSI C11 specification
-for \verb'complex.h'.
-
-The seven \verb'GxB_*_define' macros are presented below.  They all have
-essentially the same parameters in the same order as the corresponding
-\verb'GrB_*_new' methods, except that where the \verb'GrB_*_new' methods use
-pointers to the new objects, the corresponding \verb'GxB_*_define' macro uses
-just the name of the object.  Unlike their \verb'GrB_*_new' counterparts, the
-six \verb'GxB_*_define' macros do not return an error code.  Any errors will be
-detected by the compiler.
-
-Since they are \verb'm4' macros, no space can appear between the macro name
-\verb'GxB_*_define' and the subsequent left parenthesis.  This restriction may
-be relaxed in subsequent versions of SuiteSparse:GraphBLAS.
-
-Since these objects are constructed at compile-time, they do not need to be
-freed with \verb'GrB_free'.  Attempting to free them is safe, however.
-SuiteSparse:GraphBLAS will safely (and silently) do nothing if an attempt is
-made to free them.
-
-The next sections describe the following 7 macros:
-
-\vspace{0.2in}
-{\footnotesize
-\begin{tabular}{ll}
-\hline
-\verb'GxB_Type_define'      & define a \verb'GrB_Type' at compile-time \\
-\verb'GxB_UnaryOp_define'   & define a \verb'GrB_UnaryOp' at compile-time \\
-\verb'GxB_BinaryOp_define'  & define a \verb'GrB_BinaryOp' at compile-time \\
-\verb'GxB_SelectOp_define'  & define a \verb'GxB_SelectOp' at compile-time \\
-\verb'GxB_Monoid_define'    & define a \verb'GrB_Monoid' at compile-time \\
-\verb'GxB_Monoid_terminal_define'
-    & define a \verb'GrB_Monoid' at compile-time \\
-    & (with a terminal value) \\
-\verb'GxB_Semiring_define'  & define a \verb'GrB_Semiring' at compile-time \\
-\hline
-\end{tabular}
-}
-\vspace{0.2in}
-
-\newpage
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_Type\_define:} define a {\sf GrB\_Type} at compile time}
-%-------------------------------------------------------------------------------
-\label{type_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_Type_define(GrB_Type type, ctype) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_Type_define' is very similar to \verb'GrB_Type_new', except that
-    it defines a type when SuiteSparse:GraphBLAS is compiled.  Instead of the
-    \verb'sizeof(ctype)' second parameter of \verb'GrB_Type_new', the second
-    parameter \verb'ctype' of \verb'GxB_Type_define' is a C type (a built-in
-    one or from a \verb'typedef').  As in all \verb'GxB_*_define' macros, there
-    is no \verb'&' symbol in front of the GraphBLAS \verb'type' parameter,
-    since this macro defines the object instead of returning a pointer.
-
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_UnaryOp\_define:}
-define a {\sf GrB\_UnaryOp} at compile time}
-%-------------------------------------------------------------------------------
-\label{unaryop_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_UnaryOp_define(GrB_UnaryOp op, func, GrB_Type ztype, GrB_Type xtype) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_UnaryOp_define' is identical to \verb'GrB_UnaryOp_new', except
-    that it defines a unary operator when SuiteSparse:GraphBLAS is compiled.
-    The function \verb'func' is the name of a user-defined function, normally a
-    static inline function in the user's \verb'*.m4' file.  The \verb'ztype'
-    and \verb'xtype' must be built-in types (\verb'GrB_BOOL', \verb'GrB_FP64',
-    etc) or types defined with \verb'GxB_Type_define'.
-
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_BinaryOp\_define:}
-define a {\sf GrB\_BinaryOp} at compile time}
-%-------------------------------------------------------------------------------
-\label{binaryop_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_BinaryOp_define(GrB_BinaryOp op, func, GrB_Type ztype, GrB_Type xtype,
-    GrB_Type ytype) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_BinaryOp_define' is identical to \verb'GrB_BinaryOp_new', except
-    that it defines a binary operator when SuiteSparse:GraphBLAS is compiled.
-    The function \verb'func' is the name of a user-defined function, normally a
-    static inline function in the user's \verb'*.m4' file.  The \verb'ztype',
-    \verb'xtype', and \verb'ytype' must be built-in types (\verb'GrB_BOOL',
-    \verb'GrB_FP64', etc) or types defined with \verb'GxB_Type_define'.
-
-\newpage
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_SelectOp\_define:}
-define a {\sf GxB\_SelectOp} at compile time}
-%-------------------------------------------------------------------------------
-\label{selectop_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_SelectOp_define(GxB_SelectOp op, func, GrB_Type xtype, GrB_Type ttype) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_SelectOp_define' is identical to \verb'GxB_SelectOp_new', except
-    that it defines a select operator when SuiteSparse:GraphBLAS is compiled.
-    The function \verb'func' is the name of a user-defined function, normally a
-    static inline function in the user's \verb'*.m4' file.  The \verb'xtype'
-    and/or \verb'ttype' parameters
-    may be \verb'GrB_NULL' or \verb'NULL', which denotes a type-generic
-    \verb'GxB_SelectOp' operator.  If not null, \verb'xtype' and
-    \verb'ttype' must be a built-in
-    types (\verb'GrB_BOOL', \verb'GrB_FP64', etc), or a type defined with
-    \verb'GxB_Type_define'.
-
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_Monoid\_define:}
-define a {\sf GrB\_Monoid} at compile time}
-%-------------------------------------------------------------------------------
-\label{monoid_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_Monoid_define(GrB_Monoid monoid, GrB_BinaryOp op, identity) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_Monoid_define' is identical to \verb'GrB_Monoid_new', except that
-    it defines a monoid when SuiteSparse:GraphBLAS is compiled.  The \verb'op'
-    is a built-in binary operator (\verb'GrB_PLUS_FP32', for example) or a
-    binary operator defined by \verb'GxB_BinaryOp_define'.  The three types of
-    the operator must be the same, but this cannot be checked by this method at
-    compile time.  Results are undefined if this condition does not hold.
-
-    Unlike \verb'GrB_Monoid_new', the \verb'identity' parameter must be a
-    compile-time constant expression.  It must also be parsable as a valid
-    argument to an \verb'm4' macro.  For example, the following is a valid
-    definition that appears in \verb'User/Example/my_complex.m4',  It defines a
-    plus monoid for the \verb'My_complex' type, which is \verb'double complex'
-    in C.
-
-{\footnotesize
-\begin{verbatim}
-GxB_Monoid_define(My_Complex_plus_monoid, My_Complex_plus, CMPLX(0,0)) ; \end{verbatim}}
-
-For user-defined types created from a C struct, another method must be used
-for the value of the \verb'identity' parameter of \verb'GxB_Monoid_define'.
-Consider the following excerpt from \verb'User/Example/my_pagerank.m4'.  A
-struct variable such as the \verb'pagerank_type' can be initialized with the C
-expression \verb'identity = {0,0}', but the expression \verb'{0,0}' cannot be
-passed to an \verb'm4' macro since it is interpreted by \verb'm4' as two
-arguments.  The solution is to define a C preprocessor token,
-\verb'PAGERANK_ZERO', and pass that token as the third argument of
-\verb'GxB_Monoid_define'.
-
-{\footnotesize
-\begin{verbatim}
-#ifdef GxB_USER_INCLUDE
-typedef struct
-{
-    double rank ;
-    double invdegree ;
-}
-pagerank_type ;
-
-// This is valid.  It defines the identity value of the monoid as a
-// struct with identity.rank = 0 and identity.invdegree = 0.
-#define PAGERANK_ZERO {0,0}
-#endif
-
-GxB_Monoid_define(PageRank_monoid, PageRank_add, PAGERANK_ZERO) ;
-\end{verbatim} }
-
-\noindent
-The following definition will fail to compile, since \verb'm4' interprets the
-comma in the \verb'identity' argument as the start of a fourth argument:
-
-{\footnotesize
-\begin{verbatim}
-// This will fail:
-GxB_Monoid_define(PageRank_monoid, PageRank_add, {0,0}) ;
-\end{verbatim} }
-
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_Monoid\_terminal\_define:}
-define a {\sf GrB\_Monoid} at compile time, with a terminal value}
-%-------------------------------------------------------------------------------
-\label{monoid_terminal_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_Monoid_terminal_define(monoid, op, identity, terminal) ;
-\end{verbatim} } \end{mdframed}
-
-\verb'GxB_Monoid_terminal_define' is identical to \verb'GxB_Monoid_define',
-except that it allows the specification of a terminal value.  See
-Section~\ref{monoid_terminal_new} for a description of the terminal value of
-a monoid.
-
-%-------------------------------------------------------------------------------
-\subsection{{\sf GxB\_Semiring\_define:}
-define a {\sf GrB\_Semiring} at compile time}
-%-------------------------------------------------------------------------------
-\label{semiring_define}
-
-\begin{mdframed}[userdefinedwidth=6in]
-{\footnotesize
-\begin{verbatim}
-GxB_Semiring_define(GrB_Semiring semiring, GrB_Monoid add, GrB_BinaryOp mult) ;
-\end{verbatim} } \end{mdframed}
-
-    \verb'GxB_Semiring_define' is identical to \verb'GrB_Semiring_new', except
-    that it defines a semiring when SuiteSparse:GraphBLAS is compiled.  The
-    \verb'add' parameter is a \verb'GrB_Monoid' that is either predefined (such
-    as \verb'GxB_PLUS_TIMES_FP64') or defined with \verb'GxB_Monoid_define'.
-    Similarly, the \verb'mult' parameter is either a predefined binary operator
-    (such as \verb'GrB_TIMES_FP32') or a binary operator defined with
-    \verb'GxB_BinaryOp_define'.
+This feature has been removed as of SuiteSparse:GraphBLAS v3.2.0.  User
+objects defined with \verb'GxB_*_define' when GraphBLAS is compiled are not
+compatible with the faster matrix operations.  Use the run-time definitions
+instead, from the GraphBLAS C API Specification (\verb'GrB_Type_new' instead of
+\verb'GxB_Type_define' in the \verb'GraphBLAS/User/*.m4' script).
 
 \newpage
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -10113,19 +9801,22 @@ \subsection{Breadth-first search}
 \label{bfs}
 
 The \verb'bfs' examples in the \verb'Demo' folder provide several examples of
-how to compute a breadth-first search (BFS) in GraphBLAS.  The \verb'bfs5m'
-function starts at a given source node \verb's' of an undirected graph with
-\verb'n' nodes.  The graph is represented as an \verb'n'-by-\verb'n'
-Boolean matrix, \verb'A', where \verb'A(i,j)' is the edge $(i,j)$.  The matrix
-\verb'A' can actually have any type; if it is not Boolean (\verb'bool' in C, or
-\verb'GrB_BOOL' in GraphBLAS), it is typecasted to Boolean by the semiring,
-where zero is false and nonzero is true.
-
-The vector \verb'v' of size \verb'n' holds the level of each node in the
-BFS, where \verb'v(i)=0' if the node has not yet been seen.  This particular
-value makes \verb'v' useful for another role.  It can be used as a Boolean
-mask, since \verb'0' is \verb'false' and nonzero is \verb'true'.
-Initially the entire \verb'v' vector is zero.
+how to compute a breadth-first search (BFS) in GraphBLAS.  Additional BFS
+examples are in LAGraph, shown below.  The \verb'LAGraph_bfs_simple' function
+starts at a given source node \verb's' of an undirected graph with \verb'n'
+nodes.  The graph is represented as an \verb'n'-by-\verb'n' matrix, \verb'A',
+where \verb'A(i,j)' is the edge $(i,j)$.  The matrix \verb'A' can have any type
+(even a user-defined type), since the \verb'PAIR' operator does not access its
+values.  No typecasting will be done.
+
+The vector \verb'v' of size \verb'n' holds the level of each node in the BFS,
+where \verb'v(i)=0' if the node has not yet been seen.  This particular value
+makes \verb'v' useful for another role.  It can be used as a Boolean mask,
+since \verb'0' is \verb'false' and nonzero is \verb'true'.  Initially the
+entire \verb'v' vector is zero.  It is initialized as a dense vector, with all
+entries present, to improve performance (otherwise, it will slowly grow,
+incrementally, and this will take a lot of time if the number of BFS levels is
+high).
 
 The vector \verb'q' is the set of nodes just discovered at the current level,
 where \verb'q(i)=true' if node \verb'i' is in the current level.  It starts out
@@ -10133,15 +9824,12 @@ \subsection{Breadth-first search}
 
 Each iteration of the BFS consists of three calls to GraphBLAS.  The first one
 uses \verb'q' as a mask.  It modifies all positions in \verb'v' where \verb'q'
-is true, setting them all to the current \verb'level'.  No accumulator or
-descriptor are used.  Since \verb'GrB_REPLACE' is not used and
-\verb'I=GrB_ALL', \verb'GxB_subassign' and \verb'GrB_assign' are identical;
-either can be used in this step:
+has an entry, setting them all to the current \verb'level'.
 
         {\footnotesize
         \begin{verbatim}
         // v<q> = level, using vector assign with q as the mask
-        GrB_assign (v, q, NULL, level, GrB_ALL, n, NULL) ; \end{verbatim}}
+        GrB_assign (v, q, NULL, level, GrB_ALL, n, GrB_DESC_S) ; \end{verbatim}}
 
 The next call to GraphBLAS is the heart of the algorithm:
 
@@ -10149,14 +9837,15 @@ \subsection{Breadth-first search}
         \begin{verbatim}
         // q<!v> = q ||.&& A ; finds all the unvisited
         // successors from current q, using !v as the mask
-        GrB_vxm (q, v, NULL, Boolean, q, A, desc) ; \end{verbatim}}
+        GrB_vxm (q, v, NULL, GxB_ANY_PAIR_BOOL, q, A, GrB_DESC_RC) ; \end{verbatim}}
 
 The vector \verb'q' is all the set of nodes at the current level.  Suppose
 \verb'q(j)' is true, and it has a neighbor \verb'i'.  Then \verb'A(i,j)=1', and
-the dot product of \verb'A(i,:)*q' using the \verb'OR-AND' semiring will use
-the \verb'AND' multiplier on these two terms, \verb'A(i,j) AND q(j)', resulting
-in a value \verb'true'.  The \verb'OR' monoid will ``sum'' up all the results
-in this single row \verb'i'.  If the result is a column vector \verb't=A*q',
+the dot product of \verb'A(i,:)*q' using the \verb'ANY_PAIR' semiring will use
+the \verb'PAIR' multiplier on these two terms, \verb'f (A(i,j), q(j))', resulting
+in a value \verb'1'.  The \verb'ANY' monoid will ``sum'' up all the results
+in this single row \verb'i'; note that the \verb'OR' monoid would compute the
+same thing.  If the result is a column vector \verb't=A*q',
 then this \verb't(i)' will be true.  The vector \verb't' will be true for
 any node adjacent to any node in the set \verb'q'.
 
@@ -10185,77 +9874,53 @@ \subsection{Breadth-first search}
 that are neighbors of the prior set and that have not already been seen in
 any prior level.
 
-Finally, a single call to GraphBLAS computes the \verb'OR' for all entries
-in \verb'q', into a single scalar, \verb'successor'.  This value is true if
-\verb'q' contains any value true, or false otherwise.  If it is false,
-the BFS can terminate.
-
-        {\footnotesize
-        \begin{verbatim}
-        GrB_reduce (&successor, NULL, Lor, q, NULL) ; \end{verbatim}}
-
-The above call to \verb'GrB_reduce' looks costly, since it appears to be
-traversing an entire vector \verb'q', of size $\Omega(n)$.  However, the
-reduction need only iterate over the sparse entries that explicitly appear in
-\verb'q'.  So taking a second look, the reader might conclude the time is
-$\Omega(|q|)$, the number of entries in \verb'q'.  Both conclusions are
-incorrect.  Since the logical \verb'LOR' is a terminal monoid, the computation
-terminates as soon as any \verb'true' value is found.  Since all explicit
-entries in \verb'q' are \verb'true' in this algorithm, the time taken by the
-above call to \verb'GrB_reduce' is precisely $O(1)$.
-
-Another method for computing the BFS is in the \verb'bfs6' function in the
-\verb'Demo' folder.  It uses \verb'GrB_apply' and a unary operator to set the
-levels of the newly discovered nodes, instead of
-\verb'GrB_assign'.
+A single call to \verb'GrB_Vector_nvals' finds how many entries are in the
+current level.  If this is zero, the BFS can terminate.
 
 \newpage
 \begin{mdframed}[userdefinedwidth=6in]
 {\footnotesize
 \begin{verbatim}
-GrB_Info bfs5m              // BFS of a graph (using vector assign & reduce)
+#include "LAGraph_internal.h"
+#define LAGRAPH_FREE_ALL { GrB_free (&v) ; GrB_free (&q) ; }
+
+GrB_Info LAGraph_bfs_simple     // push-only BFS
 (
-    GrB_Vector *v_output,   // v [i] is the BFS level of node i in the graph
-    const GrB_Matrix A,     // input graph, treated as if boolean in semiring
-    GrB_Index s             // starting node of the BFS
+    GrB_Vector *v_output,   // v(i) is the BFS level of node i in the graph
+    GrB_Matrix A,           // input graph, treated as if boolean in semiring
+    GrB_Index source        // starting node of the BFS
 )
 {
-    GrB_Index n ;                          // # of nodes in the graph
-    GrB_Vector q = NULL ;                  // nodes visited at each level
-    GrB_Vector v = NULL ;                  // result vector
-    GrB_Monoid Lor = NULL ;                // Logical-or monoid
-    GrB_Semiring Boolean = NULL ;          // Boolean semiring
-    GrB_Descriptor desc = NULL ;           // Descriptor for vxm
-    GrB_Matrix_nrows (&n, A) ;             // n = # of rows of A
-    GrB_Vector_new (&v, GrB_INT32, n) ;    // Vector<int32_t> v(n) = 0
-    GrB_assign (v, NULL, NULL, 0, GrB_ALL, n, NULL) ;   // make v dense
-    GrB_Vector_new (&q, GrB_BOOL, n) ;     // Vector<bool> q(n) = false
-    GrB_Vector_setElement (q, true, s) ;   // q[s] = true, false elsewhere
-    GrB_Monoid_new (&Lor, GrB_LOR, (bool) false) ;
-    GrB_Semiring_new (&Boolean, Lor, GrB_LAND) ;
-    GrB_Descriptor_new (&desc) ;
-    GrB_Descriptor_set (desc, GrB_MASK, GrB_SCMP) ;     // invert the mask
-    GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE) ;  // clear q first
-
-    bool successor = true ; // true when some successor found
-    for (int32_t level = 1 ; successor && level <= n ; level++)
+    GrB_Info info ;
+    GrB_Vector q = NULL ;           // nodes visited at each level
+    GrB_Vector v = NULL ;           // result vector
+    if (v_output == NULL) LAGRAPH_ERROR ("argument missing", GrB_NULL_POINTER) ;
+    GrB_Index n, nvals ;
+    GrB_Matrix_nrows (&n, A) ;
+    // create an empty vector v, and make it dense
+    GrB_Vector_new (&v, (n > INT32_MAX) ? GrB_INT64 : GrB_INT32, n) ;
+    GrB_assign (v, NULL, NULL, 0, GrB_ALL, n, NULL) ;
+    // create a boolean vector q, and set q(source) to true
+    GrB_Vector_new (&q, GrB_BOOL, n) ;
+    GrB_Vector_setElement (q, true, source) ;
+    // BFS traversal and label the nodes
+    for (int64_t level = 1 ; level <= n ; level++)
     {
-        // v<q> = level, using vector assign with q as the mask
-        GrB_assign (v, q, NULL, level, GrB_ALL, n, NULL) ;
-        // q<!v> = q ||.&& A ; finds all the unvisited successors from current
-        // q, using !v as the mask
-        GrB_vxm (q, v, NULL, Boolean, q, A, desc) ;
-        // successor = ||(q)
-        GrB_reduce (&successor, NULL, Lor, q, NULL) ;
+        // v<q> = level
+        GrB_assign (v, q, NULL, level, GrB_ALL, n, GrB_DESC_S) ;
+        // break if q is empty
+        GrB_Vector_nvals (&nvals, q) ;
+        if (nvals == 0) break ;
+        // q'<!v> = q'*A
+        GrB_vxm (q, v, NULL, GxB_ANY_PAIR_BOOL, q, A, GrB_DESC_RC) ;
     }
-
-    GrB_Descriptor_set (desc, GrB_MASK, GxB_DEFAULT) ;  // mask not inverted
-    GrB_assign (v, v, NULL, v, GrB_ALL, n, desc) ;      // make v sparse
-    *v_output = v ;         // return result
-    // free workspace
-    GrB_free (&q) ; GrB_free (&Lor) ; GrB_free (&Boolean) ; GrB_free (&desc) ;
+    // free workspace and return result
+    (*v_output) = v ;       // return result
+    v = NULL ;              // set to NULL so LAGRAPH_FREE_ALL doesn't free it
+    LAGRAPH_FREE_ALL ;      // free all workspace (except for result v)
     return (GrB_SUCCESS) ;
-} \end{verbatim}}
+}
+\end{verbatim}}
 \end{mdframed}
 
 \newpage
@@ -10676,7 +10341,11 @@ \subsection{Reading a matrix from a file}
 \label{read}
 
 {\bf NOTE:} see also \verb'LAGraph_mmread' and \verb'LAGraph_mmwrite', which
-can read and write any matrix in Matrix Market format.
+can read and write any matrix in Matrix Market format, and
+\verb'LAGraph_binread' and \verb'LAGraph_binwrite', which read/write a matrix
+from a binary file.  The binary file I/O functions are much faster than
+the \verb'read_matrix' function described here, and also much faster than
+\verb'LAGraph_mmread' and \verb'LAGraph_mmwrite'.
 
 The \verb'read_matrix' function in the \verb'Demo' reads in a triplet matrix
 from a file, one line per entry, and then uses \verb'GrB_Matrix_build' to
@@ -10814,19 +10483,6 @@ \subsection{PageRank}
 for the entire PageRank computation.  It terminates if the 2-norm of the change
 in the rank vector \verb'r' is below a threshold.
 
-% This is computed in one of
-% two ways.  If \verb'PAGERANK_UNSAFE' is defined at compile-time, the change in
-% \verb'r' is computed during the accumulator step of \verb'GrB_vxm', as a
-% side-effect of a user-defined operator.  This works in the current version of
-% SuiteSparse:GraphBLAS, but is unsafe since it assumes the accumulator operator
-% is computed sequentially.  If computed in parallel, a race condition would
-% occur.  If \verb'PAGERANK_UNSAFE' is not defined, then a safe method is used
-% instead.
-
-If \verb'my_pagerank.m4' is placed in \verb'GraphBLAS/User/', then
-the user objects are created at compile-time instead of at run-time, which
-leads to faster execution.
-
 \newpage
 %-------------------------------------------------------------------------------
 \subsection{Triangle counting}
@@ -11042,6 +10698,12 @@ \subsection{On Linux and Mac}
 such as \verb'GrB_assign'.  You will need to use the non-polymorphic functions
 instead.
 
+{\bf NOTE: icc is generally an excellent compiler, but it will generate slower
+code than gcc for v3.2.0.  This is merely because of how the two compilers
+treat \verb'#pragma omp atomic read' and \verb'#pragma omp atomic write'.  The
+use of gcc for SuiteSparse:GraphBLAS v3.2.0 is recommended.  This difference in
+performance should be resolved in a future version.}
+
 To compile SuiteSparse:GraphBLAS and the demo programs, simply type \verb'make'
 in the main GraphBLAS folder, which compiles the library.  To use a
 non-default compiler:
@@ -11081,25 +10743,25 @@ \subsection{On Linux and Mac}
     JOBS=32 CC=gcc make \end{verbatim} }
 
 %----------------------------------------
-\subsection{On Microsoft Windows}
+\subsection{On Microsoft Windows (v3.1.2 only)}
 %----------------------------------------
 
-SuiteSparse:GraphBLAS is now ported to Microsoft Visual Studio.  That compiler
-is not ANSI C11 compiler.  It does not support the \verb'_Generic' keyword,
-required for the polymorphic GraphBLAS functions.  So for example, you will
-need to use \verb'GrB_Matrix_free' instead of just \verb'GrB_free'.  Another
-limitation is the lack of support for OpenMP tasking, used in the parallel sort
-inside GraphBLAS.  With Microsoft Visual Studio, the sort is compiled to use
-just a single thread.  The sort is used for \verb'GrB_Matrix_build' and
-\verb'GrB_Vector_build', and for \verb'GrB_assign' and  \verb'GxB_subassign'
-when the index lists are unsorted on input.  In addition, variable-length
-arrays are not supported, so user-defined types are limited to 128 bytes in
-size.  These changes have no effect if you have an ANSI C11 compliant compiler.
+SuiteSparse:GraphBLAS v3.1.2 is now ported to Microsoft Visual Studio.  That
+compiler is not ANSI C11 compliant.  It does not support the \verb'_Generic'
+keyword, required for the polymorphic GraphBLAS functions.  So for example, you
+will need to use \verb'GrB_Matrix_free' instead of just \verb'GrB_free'.
+Another limitation is the lack of support for OpenMP tasking, used in the
+parallel sort inside GraphBLAS.  With Microsoft Visual Studio, the sort is
+compiled to use just a single thread.  The sort is used for
+\verb'GrB_Matrix_build' and \verb'GrB_Vector_build', and for \verb'GrB_assign'
+and  \verb'GxB_subassign' when the index lists are unsorted on input.  In
+addition, variable-length arrays are not supported, so user-defined types are
+limited to 128 bytes in size.  These changes have no effect if you have an ANSI
+C11 compliant compiler.
 
 The following instructions apply to Windows 10, CMake 3.16, and
 Visual Studio 2019, but may work for earlier versions.
 
-
 \begin{enumerate}
 \item
 Open a terminal window and type the following in the top-level
@@ -11144,7 +10806,7 @@ \subsection{Compiling the MATLAB interface}
     gbmake \end{verbatim} }
 
 \item Follow the remaining instructions in the
-    \verb'GraphBLAS/GraphBLAS/README.txt' file, to revise your
+    \verb'GraphBLAS/GraphBLAS/README.md' file, to revise your
     MATLAB path and \verb'startup.m' file.
 
 \end{enumerate}
diff --git a/Doc/GraphBLAS_version.tex b/Doc/GraphBLAS_version.tex
index 0f494a6c28..6955a8f54d 100644
--- a/Doc/GraphBLAS_version.tex
+++ b/Doc/GraphBLAS_version.tex
@@ -1,5 +1,5 @@
 % version of SuiteSparse:GraphBLAS
 \date{VERSION
-3.1.2,
-Dec 16, 2019}
+3.2.0,
+Feb 20, 2020}
 
diff --git a/Doc/License.txt b/Doc/License.txt
index f237f0495c..9d5a07aa86 100644
--- a/Doc/License.txt
+++ b/Doc/License.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Copyright 2017-2019, Timothy A. Davis
+SuiteSparse:GraphBLAS, Copyright 2017-2020, Timothy A. Davis
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use SuiteSparse:GraphBLAS except in compliance with the
diff --git a/Doc/Makefile b/Doc/Makefile
index e34604192f..04f27a2c4f 100644
--- a/Doc/Makefile
+++ b/Doc/Makefile
@@ -18,9 +18,6 @@ GraphBLAS_UserGuide.pdf: GraphBLAS_UserGuide.tex GraphBLAS_UserGuide.bib \
 	pdflatex GraphBLAS_UserGuide.tex
 
 clean:
-	- $(RM) -rf html/*.png
-	- $(RM) -rf html/*.html
-	- $(RM) -rf html/*
 	- $(RM) -r *.out *.aux *.log *.bbl *.blg *.toc
 
 purge: clean
diff --git a/GraphBLAS/@GrB/GrB.m b/GraphBLAS/@GrB/GrB.m
index 35f8316a75..a8617b062a 100644
--- a/GraphBLAS/@GrB/GrB.m
+++ b/GraphBLAS/@GrB/GrB.m
@@ -162,7 +162,7 @@
 %   [lo,hi] = bandwidth (G) determine the lower & upper bandwidth of G
 %   C = sum (G, option)     reduce via sum, to vector or scalar
 %   C = prod (G, option)    reduce via product, to vector or scalar
-%   s = norm (G, kind)      1-norm or inf-norm of a GrB matrix
+%   s = norm (G, kind)      norm of a GrB matrix
 %   [C,I] = max (G, ...)    reduce via max, to vector or scalar
 %   C = min (G, ...)        reduce via min, to vector or scalar
 %   C = any (G, ...)        reduce via '|', to vector or scalar
@@ -255,6 +255,7 @@
 %   GrB.selectopinfo (op)        list properties of a select operator
 %   t = GrB.threads (t)          set/get # of threads to use in GraphBLAS
 %   c = GrB.chunk (c)            set/get chunk size to use in GraphBLAS
+%   b = GrB.burble (b)           set/get burble (diagnostic output)
 %   result = GrB.entries (G,...) count or query entries in a matrix
 %   result = GrB.nonz (G,...)    count or query nonzeros in a matrix
 %   C = GrB.prune (A, id)        prune entries equal to id
@@ -275,6 +276,7 @@
 %                               build a GrB matrix from list of entries
 %   [I,J,X] = GrB.extracttuples (A, desc)
 %                               extract all entries from a matrix
+%   s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
 %
 %-------------------------------------
 % Static Methods for graph algorithms:
@@ -457,7 +459,7 @@
 %
 % See also sparse.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 properties (SetAccess = private, GetAccess = private)
@@ -763,6 +765,7 @@
     semiringinfo (s, type) ;
     nthreads = threads (varargin) ;
     c = chunk (varargin) ;
+    b = burble (varargin) ;
     C = empty (arg1, arg2) ;
     s = type (A) ;
     s = issigned (type) ;
@@ -790,7 +793,7 @@
     Cout = emult (varargin) ;
     Cout = apply (varargin) ;
     Cout = extract (varargin) ;
-    r = pagerank (A, opts) ;
+    [r, stats] = pagerank (A, opts) ;
     C = ktruss (A, k, check) ;
     s = tricount (A, check) ;
     L = laplacian (A, type, check) ;
@@ -801,6 +804,7 @@
     result = entries (A, varargin) ;
     result = nonz (A, varargin) ;
     [C, I, J] = compact (A, id) ;
+    s = normdiff (A, B, kind) ;
 
 end
 end
diff --git a/GraphBLAS/@GrB/abs.m b/GraphBLAS/@GrB/abs.m
index dc35d5e669..f5fccde57a 100644
--- a/GraphBLAS/@GrB/abs.m
+++ b/GraphBLAS/@GrB/abs.m
@@ -3,7 +3,7 @@
 %
 % See also sign.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.apply ('abs', G) ;
diff --git a/GraphBLAS/@GrB/all.m b/GraphBLAS/@GrB/all.m
index 57053a146f..e85cbf1d5e 100644
--- a/GraphBLAS/@GrB/all.m
+++ b/GraphBLAS/@GrB/all.m
@@ -10,7 +10,7 @@
 %
 % See also any, nnz, GrB.entries.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/amd.m b/GraphBLAS/@GrB/amd.m
index 791eb64772..387faa7973 100644
--- a/GraphBLAS/@GrB/amd.m
+++ b/GraphBLAS/@GrB/amd.m
@@ -4,7 +4,7 @@
 %
 % See also amd, GrB/colamd, GrB/symrcm.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 p = builtin ('amd', logical (G), varargin {:}) ;
diff --git a/GraphBLAS/@GrB/and.m b/GraphBLAS/@GrB/and.m
index 78a0ed4fc0..15f9948562 100644
--- a/GraphBLAS/@GrB/and.m
+++ b/GraphBLAS/@GrB/and.m
@@ -6,7 +6,7 @@
 %
 % See also GrB/or, GrB/xor.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/any.m b/GraphBLAS/@GrB/any.m
index 3652c43933..ef9bb11d2c 100644
--- a/GraphBLAS/@GrB/any.m
+++ b/GraphBLAS/@GrB/any.m
@@ -10,7 +10,7 @@
 %
 % See also all, nnz, GrB/nnz, GrB.entries, GrB.nonz.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 desc = struct ('in0', 'transpose') ;
diff --git a/GraphBLAS/@GrB/apply.m b/GraphBLAS/@GrB/apply.m
index 3aa43028d9..b3c6fe1ef9 100644
--- a/GraphBLAS/@GrB/apply.m
+++ b/GraphBLAS/@GrB/apply.m
@@ -25,7 +25,7 @@
 %
 % See also spfun.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/assert.m b/GraphBLAS/@GrB/assert.m
index aeb70ee9f0..30b7433dc7 100644
--- a/GraphBLAS/@GrB/assert.m
+++ b/GraphBLAS/@GrB/assert.m
@@ -1,7 +1,9 @@
 function assert (G)
 %ASSERT generate an error when a condition is violated
+%
+% See also error.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 builtin ('assert', logical (G)) ;
diff --git a/GraphBLAS/@GrB/assign.m b/GraphBLAS/@GrB/assign.m
index f8d3f46ba8..6e80e8d5f5 100644
--- a/GraphBLAS/@GrB/assign.m
+++ b/GraphBLAS/@GrB/assign.m
@@ -100,7 +100,7 @@
 %
 % See also GrB.subassign, subsasgn
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/bandwidth.m b/GraphBLAS/@GrB/bandwidth.m
index 57e7469c3f..bc08bac282 100644
--- a/GraphBLAS/@GrB/bandwidth.m
+++ b/GraphBLAS/@GrB/bandwidth.m
@@ -8,7 +8,7 @@
 
 % FUTURE: this will be much faster when implemented in a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (GrB.entries (G) == 0)
diff --git a/GraphBLAS/@GrB/bfs.m b/GraphBLAS/@GrB/bfs.m
index b03aaccaee..d7eaa45a90 100644
--- a/GraphBLAS/@GrB/bfs.m
+++ b/GraphBLAS/@GrB/bfs.m
@@ -38,8 +38,13 @@
 % Example:
 %
 %   A = bucky ;
-%   [v pi] = GrB.bfs (A, 1)
-%   plot (graph (A))
+%   s = 1 ;
+%   [v pi] = GrB.bfs (A, s)
+%   figure (1) ;
+%   subplot (1,2,1) ; plot (graph (A)) ;
+%   pi2 = full (double (pi)) ;
+%   pi2 (s) = 0 ;
+%   subplot (1,2,2) ; treeplot (pi2) ; title ('BFS tree') ;
 %   n = size (A,1) ;
 %   for level = 1:n
 %       level
@@ -50,11 +55,15 @@
 %       end
 %   end
 %
-% See also graph/bfsearch, graph/shortestpathtree.
+% See also graph/bfsearch, graph/shortestpathtree, treeplot.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+%-------------------------------------------------------------------------------
+% initializations
+%-------------------------------------------------------------------------------
+
 [m, n] = size (A) ;
 if (m ~= n)
     gb_error ('A must be square') ;
@@ -77,7 +86,10 @@
     end
 end
 
-d = struct ('out', 'replace', 'mask', 'complement') ;
+% set the descriptors
+desc_rc.out  = 'replace' ;
+desc_rc.mask = 'complement' ;
+desc_s.mask = 'structural' ;
 
 % determine the method to use, and convert A if necessary
 if (isequal (kind, 'undirected'))
@@ -86,7 +98,7 @@
     end
     if (GrB.isbycol (A))
         % A is stored by column but undirected, so use q*A' instead of q*A
-        d.in1 = 'transpose' ;
+        desc_rc.in1 = 'transpose' ;
     end
 else
     if (GrB.isbycol (A))
@@ -95,48 +107,58 @@
     end
 end
 
-% determine the integer type to use, and initialize v as a full vector
+% determine the integer type to use, and initialize v as a full integer vector
 int_type = 'int64' ;
 if (n < intmax ('int32'))
     int_type = 'int32' ;
 end
 v = full (GrB (1, n, int_type)) ;
 
-% initialize the queue
-q = GrB (1, n, int_type) ;                   % q = sparse (1,n)
+%-------------------------------------------------------------------------------
+% do the BFS
+%-------------------------------------------------------------------------------
 
 if (nargout == 1)
 
+    %---------------------------------------------------------------------------
     % just compute the level of each node
-    q = GrB.subassign (q, { s }, 1) ;            % q (s) = 1
+    %---------------------------------------------------------------------------
+
+    q = GrB (1, n, 'logical') ;                  % q = sparse (1,n)
+    q = GrB.subassign (q, { s }, true) ;         % q (s) = 1
     for level = 1:n
         % assign the current level: v<q> = level
-        v = GrB.subassign (v, q, level) ;
+        v = GrB.subassign (v, q, level, desc_s) ;
         % quit if q is empty
         if (~any (q)), break, end
-        % move to the next level:  q<~v,replace> = q*A,
-        % using the boolean semiring
-        q = GrB.mxm (q, v, '|.&.logical', q, A, d) ;
+        % move to the next level:  q<~v,replace> = q*A
+        q = GrB.mxm (q, v, 'any.pair.logical', q, A, desc_rc) ;
     end
 
 else
 
+    %---------------------------------------------------------------------------
     % compute both the level and the parent
+    %---------------------------------------------------------------------------
+
     parent = full (GrB (1, n, int_type)) ;       % parent = zeros (1,n)
     parent = GrB.subassign (parent, { s }, s) ;  % parent (s) = s
+    q = GrB (1, n, int_type) ;                   % q = sparse (1,n)
     q = GrB.subassign (q, { s }, s) ;            % q (s) = s
-    id = GrB (1:n, int_type) ;                   % id = 1:n
+    id = GrB (1:n, int_type, 'by row') ;         % id = 1:n
+    semiring = ['any.1st.' int_type] ;           % any.1st.integer semiring
     for level = 1:n
         % assign the current level: v<q> = level
-        v = GrB.subassign (v, q, level) ;
+        v = GrB.subassign (v, q, level, desc_s) ;
+        % quit if q is empty
         if (~any (q)), break, end
         % move to the next level:  q<~v,replace> = q*A,
-        % using the min-first semiring
-        q = GrB.mxm (q, v, 'min.1st', q, A, d) ;
-        % assign parents
-        parent = GrB.assign (parent, q, q) ;
-        % q(i) = i for all entries in q
-        q = GrB.assign (q, q, id) ;
+        % using the any-first-integer semiring (int32 or int64)
+        q = GrB.mxm (q, v, semiring, q, A, desc_rc) ;
+        % assign parents: parent<q> = q
+        parent = GrB.assign (parent, q, q, desc_s) ;
+        % q(i) = i for all entries in q, using q<q>=1:n
+        q = GrB.assign (q, q, id, desc_s) ;
     end
     % remove zeros from parent
     parent = GrB.prune (parent) ;
diff --git a/GraphBLAS/@GrB/binopinfo.m b/GraphBLAS/@GrB/binopinfo.m
index 7e159a764c..a3cace2e59 100644
--- a/GraphBLAS/@GrB/binopinfo.m
+++ b/GraphBLAS/@GrB/binopinfo.m
@@ -16,9 +16,9 @@ function binopinfo (op, type)
 % GrB.binopinfo ('+.double'), or in the second argument, GrB.binopinfo
 % ('+', 'double').
 %
-% The MATLAB interface to GraphBLAS provides for 25 different binary
+% The MATLAB interface to GraphBLAS provides for 27 different binary
 % operators, each of which may be used with any of the 11 types, for
-% a total of 25*11 = 275 valid binary operators.  Binary operators
+% a total of 27*11 = 297 valid binary operators.  Binary operators
 % are defined by a string of the form 'op.type', or just 'op'.  In
 % the latter case, the type defaults to the type of the matrix inputs
 % to the GraphBLAS operation.
@@ -56,6 +56,7 @@ function binopinfo (op, type)
 %   |   || or  lor   x | y          |   >=  ge           x >= y
 %   &   && and land  x & y          |   <=  le           x <= y
 %   xor lxor         xor(x,y)       |
+%   pair             1              |   any              x, or y
 %
 % The three logical operators, lor, land, and lxor, also come in 11
 % types.  z = lor.double (x,y) tests the condition (x~=0) || (y~=0),
@@ -75,7 +76,7 @@ function binopinfo (op, type)
 
 % FUTURE: add complex binary operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/build.m b/GraphBLAS/@GrB/build.m
index fe0a19826c..90d7efafcf 100644
--- a/GraphBLAS/@GrB/build.m
+++ b/GraphBLAS/@GrB/build.m
@@ -65,7 +65,7 @@
 %
 % See also sparse, find, GrB.extracttuples.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/burble.m b/GraphBLAS/@GrB/burble.m
new file mode 100644
index 0000000000..66997c2163
--- /dev/null
+++ b/GraphBLAS/@GrB/burble.m
@@ -0,0 +1,21 @@
+function b = burble (varargin)
+%GRB.BURBLE get/set the burble
+%
+% Usage:
+%   b = GrB.burble ;      % get the current burble
+%   GrB.burble (b) ;      % set the burble
+%
+% GrB.burble gets and/or sets the burble setting, which controls diagnostic
+% output in GraphBLAS.  To enable this parameter, the SuiteSparse:GraphBLAS
+% library must also be compiled with burble enabled (use -DGB_BURBLE=1).  The
+% default is false.  This setting is meant for diagnostic use only, during
+% development of GraphBLAS itself.  It may also be useful for algorithmic
+% development, and is thus documented here.  See also spparms ('spumoni', ...).
+%
+% See also spparms.
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+b = gbburble (varargin {:}) ;
+
diff --git a/GraphBLAS/@GrB/ceil.m b/GraphBLAS/@GrB/ceil.m
index ecbb56d4c2..0708d6fe31 100644
--- a/GraphBLAS/@GrB/ceil.m
+++ b/GraphBLAS/@GrB/ceil.m
@@ -4,7 +4,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isfloat (G) && GrB.entries (G) > 0)
diff --git a/GraphBLAS/@GrB/chunk.m b/GraphBLAS/@GrB/chunk.m
index fa1816f8f7..3a965afd52 100644
--- a/GraphBLAS/@GrB/chunk.m
+++ b/GraphBLAS/@GrB/chunk.m
@@ -18,7 +18,7 @@
 %
 % See also GrB.threads.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 c = gbchunk (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/clear.m b/GraphBLAS/@GrB/clear.m
index 562d0da038..dea1935ed5 100644
--- a/GraphBLAS/@GrB/clear.m
+++ b/GraphBLAS/@GrB/clear.m
@@ -1,20 +1,19 @@
 function clear
-%GRB.CLEAR free all internal workspace in SuiteSparse:GraphBLAS.
+%GRB.CLEAR restore default settings for SuiteSparse:GraphBLAS.
 %
 % Usage:
 %
 %   GrB.clear
 %
-% GraphBLAS keeps an internal workspace to speedup its operations.  It also
-% uses several global settings.  These can both be cleared with GrB.clear.
-% GrB.clear also clears any non-default setting of GrB.threads, GrB.chunk, and
-% GrB.format.
+% GrB.clear clears any non-default setting of the GraphBLAS global
+% variables, including GrB.threads, GrB.chunk, and GrB.format,
+% and sets them to their defaults.  It has no effect on any GrB
+% objects.
 %
 % See also: clear, GrB.init, GrB.finalize
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-gbsetup ('finish') ;
-gbsetup ('start') ;
+gbsetup ;
 
diff --git a/GraphBLAS/@GrB/colamd.m b/GraphBLAS/@GrB/colamd.m
index 2edae92df8..9f1a1a5962 100644
--- a/GraphBLAS/@GrB/colamd.m
+++ b/GraphBLAS/@GrB/colamd.m
@@ -1,8 +1,10 @@
 function [p, varargout] = colamd (G, varargin)
 %COLAMD column approximate minimum degree ordering of a GraphBLAS matrix.
 % See 'help colamd' for details.
+%
+% See also amd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [p, varargout{1:nargout-1}] = colamd (double (G), varargin {:}) ;
diff --git a/GraphBLAS/@GrB/compact.m b/GraphBLAS/@GrB/compact.m
index 4533e7f348..a4e546ee91 100644
--- a/GraphBLAS/@GrB/compact.m
+++ b/GraphBLAS/@GrB/compact.m
@@ -32,7 +32,7 @@
 %
 % See also GrB.entries, GrB.nonz, GrB.prune.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin > 1)
diff --git a/GraphBLAS/@GrB/complex.m b/GraphBLAS/@GrB/complex.m
index 6ef80e428e..0673ff531a 100644
--- a/GraphBLAS/@GrB/complex.m
+++ b/GraphBLAS/@GrB/complex.m
@@ -11,7 +11,7 @@
 
 % FUTURE: add complex type(s)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:unsupported', 'complex type not supported') ;
diff --git a/GraphBLAS/@GrB/conj.m b/GraphBLAS/@GrB/conj.m
index 743db94633..99f6b65ad2 100644
--- a/GraphBLAS/@GrB/conj.m
+++ b/GraphBLAS/@GrB/conj.m
@@ -2,8 +2,10 @@
 %CONJ complex conjugate of a GraphBLAS matrix.
 % Since all GraphBLAS matrices are currently real, conj (G) is just G.
 % Complex support will be added in the future.
+%
+% See also real, imag.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = G ;
diff --git a/GraphBLAS/@GrB/ctranspose.m b/GraphBLAS/@GrB/ctranspose.m
index 50e1fa3098..3ff375952f 100644
--- a/GraphBLAS/@GrB/ctranspose.m
+++ b/GraphBLAS/@GrB/ctranspose.m
@@ -5,7 +5,7 @@
 %
 % See also GrB.trans, transpose.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.trans (G) ;
diff --git a/GraphBLAS/@GrB/descriptorinfo.m b/GraphBLAS/@GrB/descriptorinfo.m
index 035bfca4f2..042d933176 100644
--- a/GraphBLAS/@GrB/descriptorinfo.m
+++ b/GraphBLAS/@GrB/descriptorinfo.m
@@ -17,11 +17,16 @@ function descriptorinfo (d)
 %
 %   d.out   'default' or 'replace'      determines if C is cleared before
 %                                         the accum/mask step
-%   d.mask  'default' or 'complement'   determines if M or !M is used
+%
+%   d.mask  'default'       use M as the mask (if present)
+%           'complement'    use ~M as the mask
+%           'structural' or 'structure'   use the pattern of M, not its values
+%           'structural complement'       use the pattern of ~M
+%
 %   d.in0   'default' or 'transpose'    determines A or A' is used
 %   d.in1   'default' or 'transpose'    determines B or B' is used
 %
-%   d.axb   'default', 'Gustavson', 'heap', or 'dot'.  Determines the
+%   d.axb   'default', 'Gustavson', 'heap', 'hash', or 'dot'.  Determines the
 %            method used in GrB.mxm.  The default is to let GraphBLAS
 %            determine the method automatically, via a heuristic.
 %
@@ -76,7 +81,7 @@ function descriptorinfo (d)
 % See also GrB.binopinfo, GrB.monoidinfo, GrB.selectopinfo,
 % GrB.semiringinfo, GrB.unopinfo.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/diag.m b/GraphBLAS/@GrB/diag.m
index 90c3dab27b..e5bce1b671 100644
--- a/GraphBLAS/@GrB/diag.m
+++ b/GraphBLAS/@GrB/diag.m
@@ -31,7 +31,7 @@
 %
 % See also diag, spdiags, tril, triu, GrB.select.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/digraph.m b/GraphBLAS/@GrB/digraph.m
index 93d48a0a6f..7bf84ff6a4 100644
--- a/GraphBLAS/@GrB/digraph.m
+++ b/GraphBLAS/@GrB/digraph.m
@@ -24,7 +24,7 @@
 %
 % See also graph, digraph, GrB/graph, plot.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 type = GrB.type (G) ;
diff --git a/GraphBLAS/@GrB/disp.m b/GraphBLAS/@GrB/disp.m
index bc7f793e12..51cf9bdd88 100644
--- a/GraphBLAS/@GrB/disp.m
+++ b/GraphBLAS/@GrB/disp.m
@@ -3,8 +3,10 @@ function disp (G, level)
 % disp (G, level) displays the GraphBLAS sparse matrix G.  Level controls
 % how much is printed; 0: none, 1: terse, 2: a few entries, 3: all.  The
 % default is 2 if level is not present.
+%
+% See also display.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/display.m b/GraphBLAS/@GrB/display.m
index da991446d1..c2686b7041 100644
--- a/GraphBLAS/@GrB/display.m
+++ b/GraphBLAS/@GrB/display.m
@@ -3,8 +3,10 @@ function display (G) %#ok<DISPLAY>
 % display (G) displays the attributes and first few entries of a
 % GraphBLAS sparse matrix object.  Use disp(G,3) to display all of the
 % content of G.
+%
+% See also disp.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 name = inputname (1) ;
diff --git a/GraphBLAS/@GrB/dmperm.m b/GraphBLAS/@GrB/dmperm.m
index 9e67fb78f8..5fabade3aa 100644
--- a/GraphBLAS/@GrB/dmperm.m
+++ b/GraphBLAS/@GrB/dmperm.m
@@ -1,8 +1,10 @@
 function [p, varargout] = dmperm (G)
 %DMPERM Dulmage-Mendelsohn permutation of a GraphBLAS matrix.
 % See 'help dmperm' for details.
+%
+% See also amd, colamd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [p, varargout{1:nargout-1}] = builtin ('dmperm', logical (G)) ;
diff --git a/GraphBLAS/@GrB/dnn.m b/GraphBLAS/@GrB/dnn.m
index 80777de7dd..d7145aca08 100644
--- a/GraphBLAS/@GrB/dnn.m
+++ b/GraphBLAS/@GrB/dnn.m
@@ -33,7 +33,7 @@
 %
 % See also dnn_matlab, dnn_mat2gb.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 Y = Y0 ;
diff --git a/GraphBLAS/@GrB/double.m b/GraphBLAS/@GrB/double.m
index fab43becf0..3446083994 100644
--- a/GraphBLAS/@GrB/double.m
+++ b/GraphBLAS/@GrB/double.m
@@ -9,7 +9,7 @@
 % See also cast, GrB, complex, single, logical, int8, int16, int32, int64,
 % uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % if (isreal (G))
diff --git a/GraphBLAS/@GrB/eadd.m b/GraphBLAS/@GrB/eadd.m
index 6b3700b910..0262234757 100644
--- a/GraphBLAS/@GrB/eadd.m
+++ b/GraphBLAS/@GrB/eadd.m
@@ -32,7 +32,7 @@
 %
 % See also GrB.emult.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/eig.m b/GraphBLAS/@GrB/eig.m
index 43d813a42f..e4ca178da9 100644
--- a/GraphBLAS/@GrB/eig.m
+++ b/GraphBLAS/@GrB/eig.m
@@ -1,8 +1,10 @@
 function [V, varargout] = eig (G, varargin)
 %EIG Eigenvalues and eigenvectors of a GraphBLAS matrix.
 % See 'help eig' for details.
+%
+% See also eigs.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isreal (G) && issymmetric (G))
diff --git a/GraphBLAS/@GrB/empty.m b/GraphBLAS/@GrB/empty.m
index 10a8fa2d34..4c7ac8c707 100644
--- a/GraphBLAS/@GrB/empty.m
+++ b/GraphBLAS/@GrB/empty.m
@@ -10,7 +10,7 @@
 %
 % See also GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 m = 0 ;
diff --git a/GraphBLAS/@GrB/emult.m b/GraphBLAS/@GrB/emult.m
index bf76470fdf..c08c495db6 100644
--- a/GraphBLAS/@GrB/emult.m
+++ b/GraphBLAS/@GrB/emult.m
@@ -28,7 +28,7 @@
 %
 % See also GrB.eadd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/end.m b/GraphBLAS/@GrB/end.m
index f936a19f73..ca6870a2b1 100644
--- a/GraphBLAS/@GrB/end.m
+++ b/GraphBLAS/@GrB/end.m
@@ -1,7 +1,9 @@
 function i = end (G, k, ndims)
 %END Last index in an indexing expression for a GraphBLAS matrix.
+%
+% See also size, length.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % FUTURE: add linear indexing
diff --git a/GraphBLAS/@GrB/entries.m b/GraphBLAS/@GrB/entries.m
index 3fd3013493..abbc387061 100644
--- a/GraphBLAS/@GrB/entries.m
+++ b/GraphBLAS/@GrB/entries.m
@@ -13,8 +13,8 @@
 %
 % e = GrB.entries (A)         number of entries
 % e = GrB.entries (A, 'all')  number of entries
-% e = GrB.entries (A, 'row')  number of rows with at least one entries
-% e = GrB.entries (A, 'col')  number of columns with at least one entries
+% e = GrB.entries (A, 'row')  number of rows with at least one entry
+% e = GrB.entries (A, 'col')  number of columns with at least one entry
 %
 % X = GrB.entries (A, 'list')         list of values of unique entries
 % X = GrB.entries (A, 'all', 'list')  list of values of unique entries
@@ -49,10 +49,8 @@
 %
 % See also GrB.nonz, nnz, GrB/nnz, nonzeros, GrB/nonzeros.
 
-% FUTURE: if A is stored by row, then the row degree can be found quickly,
-% in a mexFunction that accesses A->p and A->h.  If stored by col, then
-% the col degree is the same thing.  Write a mexFunction that computes
-% the vector degree (by row or by column).
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the string arguments
 dim = 'all' ;           % 'all', 'row', or 'col'
@@ -70,14 +68,19 @@
 end
 
 if (isequal (dim, 'all'))
+
     switch kind
         case 'count'
+            % number of entries in A
+            % e = GrB.entries (A)
             if (isa (A, 'GrB'))
                 result = gbnvals (A.opaque) ;
             else
                 result = gbnvals (A) ;
             end
         case 'list'
+            % list of values of unique entries
+            % X = GrB.entries (A, 'list')
             if (isa (A, 'GrB'))
                 result = unique (gbextractvalues (A.opaque)) ;
             else
@@ -86,18 +89,34 @@
         otherwise
             gb_error ('''all'' and ''degree'' cannot be combined') ;
     end
+
 else
-    desc = struct ;
-    if (isequal (dim, 'col'))
-        desc.in0 = 'transpose' ;
+
+    % get the row or column degree
+    f = GrB.format (A) ;
+    native = (isequal (f, 'by row') && isequal (dim, 'row')) || ...
+             (isequal (f, 'by col') && isequal (dim, 'col')) ;
+    if (isa (A, 'GrB'))
+        degree = GrB (gbdegree (A.opaque, native)) ;
+    else
+        degree = GrB (gbdegree (A, native)) ;
     end
-    degree = GrB.vreduce ('+', GrB.apply ('1.double', A), desc) ;
+
     switch kind
         case 'count'
-            result = GrB.entries (degree) ;
+            % number of non-empty rows/cols
+            % e = GrB.entries (A, 'row')
+            % e = GrB.entries (A, 'col')
+            result = nnz (degree) ;
         case 'list'
+            % list of non-empty rows/cols
+            % I = GrB.entries (A, 'row', 'list')
+            % J = GrB.entries (A, 'col', 'list')
             result = find (degree) ;
         case 'degree'
+            % degree of all rows/cols
+            % d = GrB.entries (A, 'row', 'degree')
+            % d = GrB.entries (A, 'col', 'degree')
             result = degree ;
     end
 end
diff --git a/GraphBLAS/@GrB/eps.m b/GraphBLAS/@GrB/eps.m
index 44de71f868..e0374ca54d 100644
--- a/GraphBLAS/@GrB/eps.m
+++ b/GraphBLAS/@GrB/eps.m
@@ -1,9 +1,11 @@
 function C = eps (G)
 %EPS Spacing of floating-point numbers in a GraphBLAS matrix.
+%
+% See also isfloat, realmax, realmin.
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (~isfloat (G))
diff --git a/GraphBLAS/@GrB/eq.m b/GraphBLAS/@GrB/eq.m
index 5717061e0e..3a01caabbe 100644
--- a/GraphBLAS/@GrB/eq.m
+++ b/GraphBLAS/@GrB/eq.m
@@ -8,7 +8,7 @@
 %
 % See also GrB/lt, GrB/le, GrB/gt, GrB/ge, GrB/ne.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % The pattern of C depends on the type of inputs:
diff --git a/GraphBLAS/@GrB/etree.m b/GraphBLAS/@GrB/etree.m
index 0ff5947ffd..d388898856 100644
--- a/GraphBLAS/@GrB/etree.m
+++ b/GraphBLAS/@GrB/etree.m
@@ -1,8 +1,10 @@
 function [parent, varargout] = etree (G, varargin)
 %ETREE Elimination tree of a GraphBLAS matrix.
 % See 'help etree' for details.
+%
+% See also amd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [parent, varargout{1:nargout-1}] = builtin ('etree', logical (G), varargin {:});
diff --git a/GraphBLAS/@GrB/expand.m b/GraphBLAS/@GrB/expand.m
index f8bf455133..0f57946101 100644
--- a/GraphBLAS/@GrB/expand.m
+++ b/GraphBLAS/@GrB/expand.m
@@ -6,15 +6,16 @@
 % pattern of S is used.  The inputs may be either GraphBLAS and/or
 % MATLAB matrices/scalars, in any combination.  C is returned as a
 % GraphBLAS matrix.
+%
+% See also GrB.assign.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-% FUTURE: this is slow.  Use a built-in mexFunction.
-
 % FUTURE: as much as possible, replace scalar expansion with binary operators
 % used in a unary apply, when it becomes part of the C API.
 
-op = ['1st.' GrB.type(scalar)] ;
-C = GrB.kronecker (scalar, op, S) ;
+[m, n] = size (S) ;
+desc.mask = 'structure' ;
+C = GrB.assign (GrB (m, n, GrB.type (S)), S, scalar, desc) ;
 
diff --git a/GraphBLAS/@GrB/extract.m b/GraphBLAS/@GrB/extract.m
index cb27650ea1..ee89f4e46d 100644
--- a/GraphBLAS/@GrB/extract.m
+++ b/GraphBLAS/@GrB/extract.m
@@ -74,7 +74,7 @@
 %
 % See also subsref.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/extracttuples.m b/GraphBLAS/@GrB/extracttuples.m
index eab731f729..03f065eb40 100644
--- a/GraphBLAS/@GrB/extracttuples.m
+++ b/GraphBLAS/@GrB/extracttuples.m
@@ -35,7 +35,7 @@
 %
 % See also find, GrB/build.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, ~] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/eye.m b/GraphBLAS/@GrB/eye.m
index 9e8b54e156..4507c05780 100644
--- a/GraphBLAS/@GrB/eye.m
+++ b/GraphBLAS/@GrB/eye.m
@@ -12,7 +12,7 @@
 %
 % See also spones, spdiags, speye, GrB.speye, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the type
diff --git a/GraphBLAS/@GrB/false.m b/GraphBLAS/@GrB/false.m
index 3323036cdf..4ccda31771 100644
--- a/GraphBLAS/@GrB/false.m
+++ b/GraphBLAS/@GrB/false.m
@@ -5,7 +5,7 @@
 %
 % See also ones, true, zeros.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 arg1 = varargin {1} ;
diff --git a/GraphBLAS/@GrB/finalize.m b/GraphBLAS/@GrB/finalize.m
index b639b4cfe4..e3e6921f97 100644
--- a/GraphBLAS/@GrB/finalize.m
+++ b/GraphBLAS/@GrB/finalize.m
@@ -5,15 +5,13 @@
 %
 %   GrB.finalize
 %
-% GrB.finalize finishes GraphBLAS and frees all of its internal workspace.
-% Use of function is optional, since all workspace is freed when MATLAB
-% terminates.  No other GrB function can be called once this function
-% has been called.
+% GrB.finalize finishes GraphBLAS, and clears its global settings.
+% Its use is optional in this version of SuiteSparse:GraphBLAS.
 %
 % See also: GrB.clear, GrB.init
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-gbsetup ('finish') ;
+gbsetup ;
 
diff --git a/GraphBLAS/@GrB/find.m b/GraphBLAS/@GrB/find.m
index da1d6f9ab0..ac4a335dbd 100644
--- a/GraphBLAS/@GrB/find.m
+++ b/GraphBLAS/@GrB/find.m
@@ -3,7 +3,8 @@
 % [I, J, X] = find (G) extracts the nonzeros from a GraphBLAS matrix G.
 % X has the same type as G ('double', 'single', 'int8', ...).
 %
-% Linear 1D indexing (I = find (S) for the MATLAB matrix S).
+% Linear 1D indexing (I = find (S) for the MATLAB matrix S) is not yet
+% supported.
 %
 % G may contain explicit zero entries, and by default these are excluded
 % from the result.  Use GrB.extracttuples (G) to return these explicit
@@ -29,7 +30,7 @@
 % of G, instead of using GrB_Matrix_extractTuples_*, which always extracts
 % the entire matrix.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin > 1 && ~GrB.isbycol (G))
diff --git a/GraphBLAS/@GrB/fix.m b/GraphBLAS/@GrB/fix.m
index 28c31d1d9b..8ebdba9e7c 100644
--- a/GraphBLAS/@GrB/fix.m
+++ b/GraphBLAS/@GrB/fix.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isfloat (G) && GrB.entries (G) > 0)
diff --git a/GraphBLAS/@GrB/flip.m b/GraphBLAS/@GrB/flip.m
index e608af71a3..8e5c9b38dc 100644
--- a/GraphBLAS/@GrB/flip.m
+++ b/GraphBLAS/@GrB/flip.m
@@ -7,7 +7,7 @@
 %
 % See also transpose.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/floor.m b/GraphBLAS/@GrB/floor.m
index 2e1bc1a64a..5bc82766df 100644
--- a/GraphBLAS/@GrB/floor.m
+++ b/GraphBLAS/@GrB/floor.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isfloat (G) && GrB.entries (G) > 0)
diff --git a/GraphBLAS/@GrB/format.m b/GraphBLAS/@GrB/format.m
index 0e69f2358b..f3d2efb762 100644
--- a/GraphBLAS/@GrB/format.m
+++ b/GraphBLAS/@GrB/format.m
@@ -82,7 +82,7 @@
 %
 % See also GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/fprintf.m b/GraphBLAS/@GrB/fprintf.m
index 1084498ece..ea016557a2 100644
--- a/GraphBLAS/@GrB/fprintf.m
+++ b/GraphBLAS/@GrB/fprintf.m
@@ -6,7 +6,7 @@
 %
 % See also fprintf, sprintf, GrB/sprintf.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 c = gb_printf_helper ('fprintf', varargin {:}) ;
diff --git a/GraphBLAS/@GrB/full.m b/GraphBLAS/@GrB/full.m
index 512fa93071..130c9a8365 100644
--- a/GraphBLAS/@GrB/full.m
+++ b/GraphBLAS/@GrB/full.m
@@ -23,7 +23,7 @@
 %
 % See also issparse, sparse, cast, GrB.type, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isa (A, 'GrB'))
diff --git a/GraphBLAS/@GrB/ge.m b/GraphBLAS/@GrB/ge.m
index fa98be8310..94ee088ebd 100644
--- a/GraphBLAS/@GrB/ge.m
+++ b/GraphBLAS/@GrB/ge.m
@@ -8,7 +8,7 @@
 %
 % See also GrB/lt, GrB/le, GrB/gt, GrB/ne, GrB/eq.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = le (B, A) ;
diff --git a/GraphBLAS/@GrB/graph.m b/GraphBLAS/@GrB/graph.m
index c6720b7e33..084b7434a9 100644
--- a/GraphBLAS/@GrB/graph.m
+++ b/GraphBLAS/@GrB/graph.m
@@ -23,7 +23,7 @@
 %
 % See also graph, digraph, GrB/digraph.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 type = GrB.type (G) ;
diff --git a/GraphBLAS/@GrB/gt.m b/GraphBLAS/@GrB/gt.m
index 26310fddd6..4b8d81df53 100644
--- a/GraphBLAS/@GrB/gt.m
+++ b/GraphBLAS/@GrB/gt.m
@@ -8,7 +8,7 @@
 %
 % See also GrB/lt, GrB/le, GrB/ge, GrB/ne, GrB/eq.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = lt (B, A) ;
diff --git a/GraphBLAS/@GrB/horzcat.m b/GraphBLAS/@GrB/horzcat.m
index 8acf8210f7..e3a42816c2 100644
--- a/GraphBLAS/@GrB/horzcat.m
+++ b/GraphBLAS/@GrB/horzcat.m
@@ -11,7 +11,7 @@
 
 % FUTURE: this will be much faster when it is a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % determine the size of each matrix and the size of the result
diff --git a/GraphBLAS/@GrB/incidence.m b/GraphBLAS/@GrB/incidence.m
index 4787da8420..1e5bb52661 100644
--- a/GraphBLAS/@GrB/incidence.m
+++ b/GraphBLAS/@GrB/incidence.m
@@ -36,7 +36,7 @@
 %
 % See also graph/incidence, digraph/incidence.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (A) ;
diff --git a/GraphBLAS/@GrB/init.m b/GraphBLAS/@GrB/init.m
index 9be2293859..970a71f73b 100644
--- a/GraphBLAS/@GrB/init.m
+++ b/GraphBLAS/@GrB/init.m
@@ -5,27 +5,15 @@
 %
 %   GrB.init
 %
-% GrB.init must be called before any GraphBLAS function is used.  The
-% recommended usage of this function is to add it to your startup.m
-% m-file, so it is called only once when MATLAB starts up.
-%
-% In case GraphBLAS is not in your path, or in case its MATLAB interface
-% has not yet been compiled, then add this to your startup.m file,
-% where '/home/me/GraphBLAS' is the location of your copy of GraphBLAS:
-%
-%       % add the MATLAB interface to the MATLAB path
-%       addpath ('/home/me/GraphBLAS/GraphBLAS') :
-%       try
-%           GrB.init
-%           fprintf ('GraphBLAS initialized\n') ;
-%       catch
-%           fprintf ('GraphBLAS not initialized\n') ;
-%       end
+% GrB.init initializes all SuiteSparse:GraphBLAS settings to their
+% defaults.  In prior versions (v3.1.2), its use was required before
+% calling any SuiteSparse:GraphBLAS function in MATLAB.  Its use is now
+% optional in this version of SuiteSparse:GraphBLAS.
 %
 % See also: GrB.clear, GrB.finalize, startup.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-gbsetup ('start') ;
+gbsetup ;
 
diff --git a/GraphBLAS/@GrB/int16.m b/GraphBLAS/@GrB/int16.m
index 63d779d037..4598126310 100644
--- a/GraphBLAS/@GrB/int16.m
+++ b/GraphBLAS/@GrB/int16.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int32, int64,
 % uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'int16') ;
diff --git a/GraphBLAS/@GrB/int32.m b/GraphBLAS/@GrB/int32.m
index e4066728ce..9cabb72741 100644
--- a/GraphBLAS/@GrB/int32.m
+++ b/GraphBLAS/@GrB/int32.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'int32') ;
diff --git a/GraphBLAS/@GrB/int64.m b/GraphBLAS/@GrB/int64.m
index faa70232d4..9e46e32aa2 100644
--- a/GraphBLAS/@GrB/int64.m
+++ b/GraphBLAS/@GrB/int64.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'int64') ;
diff --git a/GraphBLAS/@GrB/int8.m b/GraphBLAS/@GrB/int8.m
index a4a815334d..adb2169406 100644
--- a/GraphBLAS/@GrB/int8.m
+++ b/GraphBLAS/@GrB/int8.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'int8') ;
diff --git a/GraphBLAS/@GrB/isa.m b/GraphBLAS/@GrB/isa.m
index 474484c6ab..dbd2f2a905 100644
--- a/GraphBLAS/@GrB/isa.m
+++ b/GraphBLAS/@GrB/isa.m
@@ -16,7 +16,7 @@
 % See also GrB.type, isnumeric, islogical, ischar, iscell, isstruct,
 % isfloat, isinteger, isobject, isjava, issparse, isreal, class.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isequal (classname, 'GrB') || isequal (classname, 'numeric'))
diff --git a/GraphBLAS/@GrB/isbanded.m b/GraphBLAS/@GrB/isbanded.m
index a16b30d39d..6d40350dbc 100644
--- a/GraphBLAS/@GrB/isbanded.m
+++ b/GraphBLAS/@GrB/isbanded.m
@@ -2,10 +2,12 @@
 %ISBANDED True if G is a banded GraphBLAS matrix.
 % isbanded (G, lo, hi) is true if the bandwidth of the GraphBLAS matrix G
 % is between lo and hi.
+%
+% See also istril, istriu, bandwidth.
 
 % FUTURE: this will be much faster when 'bandwidth' is a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [Glo, Ghi] = bandwidth (G) ;
diff --git a/GraphBLAS/@GrB/isdiag.m b/GraphBLAS/@GrB/isdiag.m
index 98ecbd2000..12203843d9 100644
--- a/GraphBLAS/@GrB/isdiag.m
+++ b/GraphBLAS/@GrB/isdiag.m
@@ -6,7 +6,7 @@
 
 % FUTURE: this will be much faster when 'bandwidth' is a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = isbanded (G, 0, 0) ;
diff --git a/GraphBLAS/@GrB/isempty.m b/GraphBLAS/@GrB/isempty.m
index 4581f5a9de..8e844628a0 100644
--- a/GraphBLAS/@GrB/isempty.m
+++ b/GraphBLAS/@GrB/isempty.m
@@ -4,7 +4,7 @@
 %
 % See also size.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/isfinite.m b/GraphBLAS/@GrB/isfinite.m
index 59d9116eba..4a91c50183 100644
--- a/GraphBLAS/@GrB/isfinite.m
+++ b/GraphBLAS/@GrB/isfinite.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/isfloat.m b/GraphBLAS/@GrB/isfloat.m
index 6c1e7d61b4..f6d6f7741d 100644
--- a/GraphBLAS/@GrB/isfloat.m
+++ b/GraphBLAS/@GrB/isfloat.m
@@ -5,7 +5,7 @@
 %
 % See also isnumeric, isreal, isinteger, islogical, GrB.type, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 t = gbtype (G.opaque) ;
diff --git a/GraphBLAS/@GrB/isfull.m b/GraphBLAS/@GrB/isfull.m
index 4bcaecef1c..4212e32076 100644
--- a/GraphBLAS/@GrB/isfull.m
+++ b/GraphBLAS/@GrB/isfull.m
@@ -3,9 +3,11 @@
 % For a GraphBLAS matrix, or a MATLAB sparse matrix, GrB.isfull (A) is true if
 % numel (A) == nnz (A).  A can be a GraphBLAS matrix, or a MATLAB sparse or
 % full matrix.  GrB.isfull (A) is always true if A is a MATLAB full matrix.
+%
+% See also issparse.
 
 if (isa (A, 'GrB') || issparse (A))
-    s = (numel (A) == nnz (A)) ;
+    s = (numel (A) == GrB.entries (A)) ;
 else
     s = true ;
 end
diff --git a/GraphBLAS/@GrB/ishermitian.m b/GraphBLAS/@GrB/ishermitian.m
index fc637906bd..e16b217de8 100644
--- a/GraphBLAS/@GrB/ishermitian.m
+++ b/GraphBLAS/@GrB/ishermitian.m
@@ -6,9 +6,9 @@
 %
 % See also issymmetric.
 
-% FUTURE: this will be much faster.  See CHOLMOD/MATLAB/spsym.
+% FUTURE: this can be much faster.  See CHOLMOD/MATLAB/spsym.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
@@ -24,7 +24,7 @@
     if (isequal (option, 'skew'))
         s = (norm (G + G', 1) == 0) ;
     else
-        s = (norm (G - G', 1) == 0) ;
+        s = (GrB.normdiff (G, G', 1) == 0) ;
     end
     if (s)
         % also check the pattern; G might have explicit zeros
diff --git a/GraphBLAS/@GrB/isinf.m b/GraphBLAS/@GrB/isinf.m
index d2d6f9e40c..c865ada1e6 100644
--- a/GraphBLAS/@GrB/isinf.m
+++ b/GraphBLAS/@GrB/isinf.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/isinteger.m b/GraphBLAS/@GrB/isinteger.m
index da10d6c6d3..9e1011757b 100644
--- a/GraphBLAS/@GrB/isinteger.m
+++ b/GraphBLAS/@GrB/isinteger.m
@@ -5,7 +5,7 @@
 %
 % See also isnumeric, isfloat, isreal, islogical, GrB.type, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 t = gbtype (G.opaque) ;
diff --git a/GraphBLAS/@GrB/islogical.m b/GraphBLAS/@GrB/islogical.m
index 39bebb9c26..9937cce8d0 100644
--- a/GraphBLAS/@GrB/islogical.m
+++ b/GraphBLAS/@GrB/islogical.m
@@ -4,7 +4,7 @@
 %
 % See also isnumeric, isfloat, isreal, isinteger, GrB.type, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = isequal (gbtype (G.opaque), 'logical') ;
diff --git a/GraphBLAS/@GrB/ismatrix.m b/GraphBLAS/@GrB/ismatrix.m
index 1fa715535e..ea70169655 100644
--- a/GraphBLAS/@GrB/ismatrix.m
+++ b/GraphBLAS/@GrB/ismatrix.m
@@ -4,7 +4,7 @@
 %
 % See also issparse, isvector, isscalar, sparse, full, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = true ;
diff --git a/GraphBLAS/@GrB/isnan.m b/GraphBLAS/@GrB/isnan.m
index ebc1d69ff2..8ea6cd176e 100644
--- a/GraphBLAS/@GrB/isnan.m
+++ b/GraphBLAS/@GrB/isnan.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/isnumeric.m b/GraphBLAS/@GrB/isnumeric.m
index f8fa2cc5f8..fb892b3ad4 100644
--- a/GraphBLAS/@GrB/isnumeric.m
+++ b/GraphBLAS/@GrB/isnumeric.m
@@ -6,7 +6,7 @@
 %
 % See also isfloat, isreal, isinteger, islogical, GrB.type, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = true ;
diff --git a/GraphBLAS/@GrB/isreal.m b/GraphBLAS/@GrB/isreal.m
index 17234bf0d5..2d0b7f8c31 100644
--- a/GraphBLAS/@GrB/isreal.m
+++ b/GraphBLAS/@GrB/isreal.m
@@ -6,7 +6,7 @@
 %
 % See also isnumeric, isfloat, isinteger, islogical, GrB.type, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = ~isequal (gbtype (G.opaque), 'complex') ;
diff --git a/GraphBLAS/@GrB/isscalar.m b/GraphBLAS/@GrB/isscalar.m
index a9913baae9..c58ca75aaf 100644
--- a/GraphBLAS/@GrB/isscalar.m
+++ b/GraphBLAS/@GrB/isscalar.m
@@ -4,7 +4,7 @@
 %
 % See also issparse, ismatrix, isvector, sparse, full, isa, GrB, size.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/issigned.m b/GraphBLAS/@GrB/issigned.m
index 5da6b225c0..5a25f575bb 100644
--- a/GraphBLAS/@GrB/issigned.m
+++ b/GraphBLAS/@GrB/issigned.m
@@ -5,7 +5,7 @@
 %
 % See also isinteger, isreal, isnumeric, isfloat.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = isequal (type, 'double') || isequal (type, 'single') || ...
diff --git a/GraphBLAS/@GrB/issparse.m b/GraphBLAS/@GrB/issparse.m
index 86becb6369..449231b0a4 100644
--- a/GraphBLAS/@GrB/issparse.m
+++ b/GraphBLAS/@GrB/issparse.m
@@ -4,7 +4,7 @@
 %
 % See also ismatrix, isvector, isscalar, sparse, full, isa, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = true ;
diff --git a/GraphBLAS/@GrB/issymmetric.m b/GraphBLAS/@GrB/issymmetric.m
index 36fc465bf3..d6d69d50e3 100644
--- a/GraphBLAS/@GrB/issymmetric.m
+++ b/GraphBLAS/@GrB/issymmetric.m
@@ -8,7 +8,7 @@
 
 % FUTURE: this will be much faster.  See CHOLMOD/MATLAB/spsym.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
@@ -24,7 +24,7 @@
     if (isequal (option, 'skew'))
         s = (norm (G + G.', 1) == 0) ;
     else
-        s = (norm (G - G.', 1) == 0) ;
+        s = (GrB.normdiff (G, G.', 1) == 0) ;
     end
     if (s)
         % also check the pattern; G might have explicit zeros
diff --git a/GraphBLAS/@GrB/istril.m b/GraphBLAS/@GrB/istril.m
index 108967455f..4bfaf80d91 100644
--- a/GraphBLAS/@GrB/istril.m
+++ b/GraphBLAS/@GrB/istril.m
@@ -10,7 +10,7 @@
 
 % FUTURE: this will be much faster when written as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = (GrB.entries (triu (G, 1)) == 0) ;
diff --git a/GraphBLAS/@GrB/istriu.m b/GraphBLAS/@GrB/istriu.m
index ccb35b9cbd..079913c68d 100644
--- a/GraphBLAS/@GrB/istriu.m
+++ b/GraphBLAS/@GrB/istriu.m
@@ -10,7 +10,7 @@
 
 % FUTURE: this will be much faster when written as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = (GrB.entries (tril (G, -1)) == 0) ;
diff --git a/GraphBLAS/@GrB/isvector.m b/GraphBLAS/@GrB/isvector.m
index 64592153f8..02b6e6a8f9 100644
--- a/GraphBLAS/@GrB/isvector.m
+++ b/GraphBLAS/@GrB/isvector.m
@@ -4,7 +4,7 @@
 %
 % See also issparse, ismatrix, isscalar, sparse, full, isa, GrB, size.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/kron.m b/GraphBLAS/@GrB/kron.m
index 1d42f663b9..92321662f5 100644
--- a/GraphBLAS/@GrB/kron.m
+++ b/GraphBLAS/@GrB/kron.m
@@ -6,7 +6,7 @@
 %
 % See also GrB.kronecker.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.kronecker (A, '*', B) ;
diff --git a/GraphBLAS/@GrB/kronecker.m b/GraphBLAS/@GrB/kronecker.m
index 863e6e8b53..872d716372 100644
--- a/GraphBLAS/@GrB/kronecker.m
+++ b/GraphBLAS/@GrB/kronecker.m
@@ -21,7 +21,7 @@
 %
 % See also kron, GrB/kron.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/ktruss.m b/GraphBLAS/@GrB/ktruss.m
index 39280135d2..ba3a99d9c1 100644
--- a/GraphBLAS/@GrB/ktruss.m
+++ b/GraphBLAS/@GrB/ktruss.m
@@ -34,7 +34,7 @@
 %
 % See also GrB.tricount.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % check inputs
diff --git a/GraphBLAS/@GrB/laplacian.m b/GraphBLAS/@GrB/laplacian.m
index 167a216f6e..30f53b3e19 100644
--- a/GraphBLAS/@GrB/laplacian.m
+++ b/GraphBLAS/@GrB/laplacian.m
@@ -21,7 +21,7 @@
 %
 % See also graph/laplacian.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/ldivide.m b/GraphBLAS/@GrB/ldivide.m
index 363881c007..3a4c43758a 100644
--- a/GraphBLAS/@GrB/ldivide.m
+++ b/GraphBLAS/@GrB/ldivide.m
@@ -1,8 +1,10 @@
 function C = ldivide (A, B)
 %LDIVIDE C = A.\B, sparse matrix element-wise division.
 % C = A.\B is the same as C = B./A.  See rdivide for more details.
+%
+% See also GrB/rdivide.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = rdivide (B, A) ;
diff --git a/GraphBLAS/@GrB/le.m b/GraphBLAS/@GrB/le.m
index 66669de6e6..38d933cabf 100644
--- a/GraphBLAS/@GrB/le.m
+++ b/GraphBLAS/@GrB/le.m
@@ -15,7 +15,7 @@
 % A matrix, B matrix:  C is full.
 % Zeroes are then dropped from C after it is computed.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/length.m b/GraphBLAS/@GrB/length.m
index 7a14fed63c..dcce043cb2 100644
--- a/GraphBLAS/@GrB/length.m
+++ b/GraphBLAS/@GrB/length.m
@@ -7,7 +7,7 @@
 %
 % See also size, numel.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/logical.m b/GraphBLAS/@GrB/logical.m
index 7df46dc1a4..665f09287f 100644
--- a/GraphBLAS/@GrB/logical.m
+++ b/GraphBLAS/@GrB/logical.m
@@ -9,7 +9,7 @@
 % See also cast, GrB, double, complex, single, int8, int16, int32, int64,
 % uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbsparse (G.opaque, 'logical') ;
diff --git a/GraphBLAS/@GrB/lt.m b/GraphBLAS/@GrB/lt.m
index 250be5f602..480dd6f58c 100644
--- a/GraphBLAS/@GrB/lt.m
+++ b/GraphBLAS/@GrB/lt.m
@@ -15,7 +15,7 @@
 % A matrix, B matrix:  C has the pattern of the set union, A+B.
 % Zeroes are then dropped from C after it is computed.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/max.m b/GraphBLAS/@GrB/max.m
index 6835009162..9e82edbca5 100644
--- a/GraphBLAS/@GrB/max.m
+++ b/GraphBLAS/@GrB/max.m
@@ -1,4 +1,4 @@
-function [C I] = max (varargin)
+function C = max (varargin)
 %MAX Maximum elements of a GraphBLAS or MATLAB matrix.
 % C = max (G) is the largest entry in the vector G.  If G is a matrix,
 % C is a row vector with C(j) = max (G (:,j)).
@@ -17,7 +17,7 @@
 %
 % See also min.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 G = varargin {1} ;
diff --git a/GraphBLAS/@GrB/min.m b/GraphBLAS/@GrB/min.m
index d6c8127c66..6c250b219e 100644
--- a/GraphBLAS/@GrB/min.m
+++ b/GraphBLAS/@GrB/min.m
@@ -18,7 +18,7 @@
 %
 % See also max.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 G = varargin {1} ;
diff --git a/GraphBLAS/@GrB/minus.m b/GraphBLAS/@GrB/minus.m
index a9e9b7f92a..4ee677691c 100644
--- a/GraphBLAS/@GrB/minus.m
+++ b/GraphBLAS/@GrB/minus.m
@@ -11,7 +11,7 @@
 %
 % See also GrB.eadd, plus, uminus.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = A + (-B) ;
diff --git a/GraphBLAS/@GrB/mis.m b/GraphBLAS/@GrB/mis.m
index 97d181e757..d38cdaa143 100644
--- a/GraphBLAS/@GrB/mis.m
+++ b/GraphBLAS/@GrB/mis.m
@@ -25,7 +25,7 @@
 %
 % See also GrB.offdiag.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (A) ;
diff --git a/GraphBLAS/@GrB/mldivide.m b/GraphBLAS/@GrB/mldivide.m
index e5f26b6ba9..790534de5a 100644
--- a/GraphBLAS/@GrB/mldivide.m
+++ b/GraphBLAS/@GrB/mldivide.m
@@ -7,10 +7,12 @@
 %
 % The input matrices may be either GraphBLAS and/or MATLAB matrices, in
 % any combination.  C is returned as a GraphBLAS matrix.
+%
+% See also GrB/mrdivide.
 
 % FUTURE: add solvers over a group (GF(2) for example).
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/monoidinfo.m b/GraphBLAS/@GrB/monoidinfo.m
index 992a7f5dba..2a2e38e0a2 100644
--- a/GraphBLAS/@GrB/monoidinfo.m
+++ b/GraphBLAS/@GrB/monoidinfo.m
@@ -36,7 +36,7 @@ function monoidinfo (monoid, type)
 
 % FUTURE: add complex monoids
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/mpower.m b/GraphBLAS/@GrB/mpower.m
index a11ff1871c..a10c7e2910 100644
--- a/GraphBLAS/@GrB/mpower.m
+++ b/GraphBLAS/@GrB/mpower.m
@@ -8,7 +8,7 @@
 %
 % See also GrB/power.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (A) ;
diff --git a/GraphBLAS/@GrB/mrdivide.m b/GraphBLAS/@GrB/mrdivide.m
index 0d33394670..34dd51c5e7 100644
--- a/GraphBLAS/@GrB/mrdivide.m
+++ b/GraphBLAS/@GrB/mrdivide.m
@@ -7,10 +7,12 @@
 %
 % The input matrices may be either GraphBLAS and/or MATLAB matrices, in
 % any combination.  C is returned as a GraphBLAS matrix.
+%
+% See also GrB/mldivide.
 
 % FUTURE: add solvers over a group (GF(2) for example).
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (B))
diff --git a/GraphBLAS/@GrB/mtimes.m b/GraphBLAS/@GrB/mtimes.m
index 3a5e0f6e0c..799bb30732 100644
--- a/GraphBLAS/@GrB/mtimes.m
+++ b/GraphBLAS/@GrB/mtimes.m
@@ -10,7 +10,7 @@
 %
 % See also GrB.mxm, GrB.emult, times.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A) || isscalar (B))
diff --git a/GraphBLAS/@GrB/mxm.m b/GraphBLAS/@GrB/mxm.m
index 4f1bd6feb3..cfbe3c9dee 100644
--- a/GraphBLAS/@GrB/mxm.m
+++ b/GraphBLAS/@GrB/mxm.m
@@ -66,7 +66,7 @@
 %
 % See also GrB.descriptorinfo, GrB.add, mtimes.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/ne.m b/GraphBLAS/@GrB/ne.m
index a28196e73a..322d35c7a4 100644
--- a/GraphBLAS/@GrB/ne.m
+++ b/GraphBLAS/@GrB/ne.m
@@ -15,7 +15,7 @@
 % A matrix, B matrix:  C is sparse, with the pattern of A+B.
 % Zeroes are then dropped from C after it is computed.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/nnz.m b/GraphBLAS/@GrB/nnz.m
index 43bc12bdb2..b559faf2f3 100644
--- a/GraphBLAS/@GrB/nnz.m
+++ b/GraphBLAS/@GrB/nnz.m
@@ -6,7 +6,7 @@
 %
 % See also GrB.entries, GrB.prune, nonzeros, size, numel.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 e = GrB.entries (G) - GrB.entries (GrB.select (G, '==0')) ;
diff --git a/GraphBLAS/@GrB/nonz.m b/GraphBLAS/@GrB/nonz.m
index a439b67aed..4ae218a050 100644
--- a/GraphBLAS/@GrB/nonz.m
+++ b/GraphBLAS/@GrB/nonz.m
@@ -49,7 +49,7 @@
 %
 % See also GrB.entries, nnz, GrB/nnz, nonzeros, GrB/nonzeros, GrB.prune.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % issparse (G) is overloaded for a GraphBLAS matrix, and always returns
diff --git a/GraphBLAS/@GrB/nonzeros.m b/GraphBLAS/@GrB/nonzeros.m
index 74805e7f05..e3b0fdda6e 100644
--- a/GraphBLAS/@GrB/nonzeros.m
+++ b/GraphBLAS/@GrB/nonzeros.m
@@ -6,9 +6,9 @@
 % [I,J,X] = find (G) or [I,J,X] = GrB.extracttuples (G) to return those
 % entries.  This function returns the X of [I,J,X] = find (GrB.prune(G)).
 %
-% See also GrB.extracttuples, find.
+% See also GrB.extracttuples, GrB.entries, GrB.nonz, find.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 X = gbextractvalues (gbselect ('nonzero', G.opaque, struct ('kind', 'GrB'))) ;
diff --git a/GraphBLAS/@GrB/norm.m b/GraphBLAS/@GrB/norm.m
index 88c5f6cadc..b9c6c60b6e 100644
--- a/GraphBLAS/@GrB/norm.m
+++ b/GraphBLAS/@GrB/norm.m
@@ -17,53 +17,20 @@
 %   norm (G,inf) is the maximum of abs (G)
 %   norm (G,-inf) is the minimum of abs (G)
 %
-% See also GrB.reduce.
+% See also GrB.reduce, GrB.normdiff.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% FUTURE: the p-norm is not yet supported.
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 1)
     kind = 2 ;
 end
-if (kind == 0)
-    gb_error ('unknown norm') ;
-end
-
-if (ischar (kind))
-    if (isequal (kind, 'fro'))
-        kind = 0 ;
-    else
-        gb_error ('unknown norm') ;
-    end
-end
 
-if (isvector (G))
-    if (kind == 1)
-        s = sum (abs (G)) ;
-    elseif (kind == 2 || kind == 0)
-        s = sqrt (sum (G.^2)) ;
-    elseif (kind == inf)
-        s = max (abs (G)) ;
-    elseif (kind == -inf)
-        s = min (abs (G)) ;
-    else
-        gb_error ('unknown norm') ;
-    end
-else
-    if (kind == 1)
-        s = max (sum (abs (G))) ;
-    elseif (kind == 2)
-        gb_error ('Sparse norm (G,2) is not available.') ;
-    elseif (kind == 0)
-        s = sqrt (sum (G.^2, 'all')) ;
-    elseif (kind == inf)
-        s = max (sum (abs (G), 2)) ;
-    elseif (kind == -inf)
-        gb_error ('Sparse norm(G,-inf) is not available.') ;
-    else
-        gb_error ('unknown norm') ;
-    end
+if (isa (G, 'GrB'))
+    G = G.opaque ;
 end
 
-s = full (double (s)) ;
+s = gbnorm (G, kind) ;
 
diff --git a/GraphBLAS/@GrB/normdiff.m b/GraphBLAS/@GrB/normdiff.m
new file mode 100644
index 0000000000..99bc195336
--- /dev/null
+++ b/GraphBLAS/@GrB/normdiff.m
@@ -0,0 +1,38 @@
+function s = normdiff (A,B,kind)
+%NORMDIFF norm (A-B,kind)
+%
+% If A-B is a matrix:
+%
+%   norm (A-B,1) is the maximum sum of the columns of abs (A-B).
+%   norm (A-B,inf) is the maximum sum of the rows of abs (A-B).
+%   norm (A-B,'fro') is the Frobenius norm of A-B: the sqrt of the sum of the
+%       squares of the entries in A-B.
+%   The 2-norm is not available for either MATLAB or GraphBLAS sparse
+%       matrices.
+%
+% If A-B is a row or column vector:
+%
+%   norm (A-B,1) is the sum of abs (A-B)
+%   norm (A-B,2) is the sqrt of the sum of (A-B).^2
+%   norm (A-B,inf) is the maximum of abs (A-B)
+%   norm (A-B,-inf) is the minimum of abs (A-B)
+%
+% See also GrB.reduce, GrB.norm.
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+if (nargin < 3)
+    kind = 2 ;
+end
+
+if (isa (A, 'GrB'))
+    A = A.opaque ;
+end
+
+if (isa (B, 'GrB'))
+    B = B.opaque ;
+end
+
+s = gbnormdiff (A, B, kind) ;
+
diff --git a/GraphBLAS/@GrB/not.m b/GraphBLAS/@GrB/not.m
index f1049d917f..28d2f363e0 100644
--- a/GraphBLAS/@GrB/not.m
+++ b/GraphBLAS/@GrB/not.m
@@ -8,7 +8,7 @@
 %
 % See also GrB.apply.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.apply ('~.logical', full (G)) ;
diff --git a/GraphBLAS/@GrB/numel.m b/GraphBLAS/@GrB/numel.m
index cbb5fc2f45..d5b022ea9d 100644
--- a/GraphBLAS/@GrB/numel.m
+++ b/GraphBLAS/@GrB/numel.m
@@ -6,7 +6,7 @@
 %
 % See also nnz.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/nzmax.m b/GraphBLAS/@GrB/nzmax.m
index d98b1ca528..84183e93de 100644
--- a/GraphBLAS/@GrB/nzmax.m
+++ b/GraphBLAS/@GrB/nzmax.m
@@ -7,7 +7,7 @@
 %
 % See also nnz, GrB.entries, GrB.nonz.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 e = max (GrB.entries (G), 1) ;
diff --git a/GraphBLAS/@GrB/offdiag.m b/GraphBLAS/@GrB/offdiag.m
index d4c3470991..582d97ae99 100644
--- a/GraphBLAS/@GrB/offdiag.m
+++ b/GraphBLAS/@GrB/offdiag.m
@@ -4,7 +4,7 @@
 %
 % See also tril, triu, diag, GrB.select.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.select ('offdiag', A, 0) ;
diff --git a/GraphBLAS/@GrB/ones.m b/GraphBLAS/@GrB/ones.m
index 9ec8d11216..e6c23de9d8 100644
--- a/GraphBLAS/@GrB/ones.m
+++ b/GraphBLAS/@GrB/ones.m
@@ -6,10 +6,7 @@
 %
 % See also zeros, false, true.
 
-% FUTURE: GrB_*_assign and GxB_*_subassign need to have a special
-% case for this, as the expansion of a scalar to a dense matrix.
-
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.subassign (zeros (varargin {:}), 1) ;
diff --git a/GraphBLAS/@GrB/or.m b/GraphBLAS/@GrB/or.m
index 3d0e3e27b1..da05404190 100644
--- a/GraphBLAS/@GrB/or.m
+++ b/GraphBLAS/@GrB/or.m
@@ -6,7 +6,7 @@
 %
 % See also GrB/and, GrB/xor.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/pagerank.m b/GraphBLAS/@GrB/pagerank.m
index 6e3ff0a44d..daf8c010e3 100644
--- a/GraphBLAS/@GrB/pagerank.m
+++ b/GraphBLAS/@GrB/pagerank.m
@@ -1,4 +1,4 @@
-function r = pagerank (A, opts)
+function [r, stats] = pagerank (A, opts)
 %GRB.PAGERANK PageRank of a graph.
 % r = GrB.pagerank (A) computes the PageRank of a graph with adjacency matrix
 % A.  r = GrB.pagerank (A, options) allows for non-default options to be
@@ -12,13 +12,24 @@
 %   opts.type = 'double'    compute in 'single' or 'double' precision
 %
 % A can be a GraphBLAS or MATLAB matrix.  A can have any format ('by row' or
-% 'by col'), but GrB.pagerank is slightly faster if A is 'by col'.
+% 'by col'), but GrB.pagerank is faster if A is 'by col'.
+%
+% An optional 2nd output argument provides statistics:
+%   stats.tinit     initialization time
+%   stats.trank     pagerank time
+%   stats.iter      # of iterations taken
 %
 % See also centrality.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+%-------------------------------------------------------------------------------
+% initializations
+%-------------------------------------------------------------------------------
+
+tstart = tic ;
+
 % check inputs and set defaults
 if (nargin < 2)
     opts = struct ;
@@ -39,10 +50,16 @@
     opts.type = 'double' ;
 end
 
+if (~(isequal (opts.type, 'single') || isequal (opts.type, 'double')))
+    gb_error ('opts.type must be ''single'' or ''double''') ;
+end
+
 % get options
 tol = opts.tol ;
 maxit = opts.maxit ;
 damp = opts.damp ;
+damp = max (damp, 0) ;
+damp = min (damp, 1) ;
 type = opts.type ;
 weighted = opts.weighted ;
 
@@ -51,40 +68,44 @@
     gb_error ('A must be square') ;
 end
 
-% native, if A is already of the right type, and stored by column
-native = (GrB.isbycol (A) & isequal (GrB.type (A), type)) ;
-
-% construct the matrix G and outdegree d
+% select the semiring and determine if A is native
 if (weighted)
     % use the weighted edges of G
-    d = GrB.vreduce ('+', spones (GrB (A, type))) ;
-    if (native)
-        G = A ;
-    else
-        G = GrB (A, type, 'by col') ;
-    end
+    % native, if A is already of the right type, and stored by column
+    native = (GrB.isbycol (A) & isequal (GrB.type (A), type)) ;
+    semiring = ['+.*.' type] ;
 else
-    % use the pattern of G 
-    if (native)
-        G = GrB.apply ('1', A) ;
-    else
-        G = GrB.apply ('1', GrB (A, type, 'by col')) ;
-    end
-    d = GrB.vreduce ('+', G) ;
+    % use just the pattern of G, so A can be of any type.
+    % native, if A is already stored by column
+    native = GrB.isbycol (A) ;
+    semiring = ['+.2nd.' type] ;
+end
+
+% construct the matrix G, or use A as-is
+if (native)
+    G = A ;
+else
+    G = GrB (A, type, 'by col') ;
 end
 
+% select the accum operator, according to the type
+accum = ['+.' type] ;
+
 % d (i) = outdegree of node i, or 1 if i is a sink
+d = GrB (GrB.entries (A, 'row', 'degree'), type) ;
 sinks = find (d == 0) ;
 any_sinks = ~isempty (sinks) ;
 if (any_sinks)
-    % d (sinks) = 1 ;
+    % d (sinks) = 1, to avoid divide-by-zero
     d = GrB.subassign (d, { sinks }, 1) ;
 end
 
-% place explicit zeros on the diagonal of G so that r remains full
-I = int64 (0) : int64 (n-1) ;
-desc0.base = 'zero-based' ;
-G = G + GrB.build (I, I, zeros (n, 1, type), n, n, desc0) ;
+%-------------------------------------------------------------------------------
+% compute the pagerank
+%-------------------------------------------------------------------------------
+
+stats.tinit = toc (tstart) ;
+tstart = tic ;
 
 % teleport factor
 tfactor = cast ((1 - damp) / n, type) ;
@@ -92,29 +113,36 @@
 % sink factor
 dn = cast (damp / n, type) ;
 
-% use G' in GrB.mxm, and return the result as a MATLAB full vector
-% FUTURE: when GraphBLAS is fast for dense vector, use them instead
+% use G' in GrB.mxm
 desc.in0 = 'transpose' ;
-desc.kind = 'full' ;
 
 % initial PageRank: all nodes have rank 1/n
-r = ones (n, 1, type) / n ;
+r = GrB (ones (n, 1, type) / n) ;
 
 % prescale d with damp so it doesn't have to be done in each iteration
 d = d / damp ;
 
 % compute the PageRank
 for iter = 1:maxit
-    rold = r ;
+    prior = r ;
     teleport = tfactor ;
     if (any_sinks)
         % add the teleport factor from all the sinks
-        teleport = teleport + dn * sum (r (sinks)) ;
+        % teleport = teleport + dn * sum (r (sinks})) ;
+        teleport = teleport + dn * sum (GrB.extract (r, { sinks })) ;
     end
-    % r = damp * G' * (r./d) + teleport
-    r = GrB.mxm (G, '+.*', r ./ d, desc) + teleport ;
-    if (norm (r - rold, inf) < tol)
+    % r (1:n) = teleport
+    r = GrB.expand (teleport, r) ;
+    % t = prior ./ d
+    t = GrB.emult (prior, '/', d) ;
+    % r = r + G' * t
+    r = GrB.mxm (r, accum, G, semiring, t, desc) ;
+    % e = norm (r-prior, inf)
+    e = GrB.normdiff (r, prior, inf) ;
+    if (e < tol)
         % convergence has been reached
+        stats.trank = toc (tstart) ;
+        stats.iter = iter ;
         return ;
     end
 end
diff --git a/GraphBLAS/@GrB/plus.m b/GraphBLAS/@GrB/plus.m
index fdbbb1b64b..764903275b 100644
--- a/GraphBLAS/@GrB/plus.m
+++ b/GraphBLAS/@GrB/plus.m
@@ -11,7 +11,7 @@
 %
 % See also GrB.eadd, minus, uminus.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/power.m b/GraphBLAS/@GrB/power.m
index 63fca0b593..d65bc27acb 100644
--- a/GraphBLAS/@GrB/power.m
+++ b/GraphBLAS/@GrB/power.m
@@ -13,7 +13,7 @@
 %
 % See also GrB/mpower.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/private/gb_convert_index_1d_to_2d.m b/GraphBLAS/@GrB/private/gb_convert_index_1d_to_2d.m
index 34732b832a..36fa458f82 100644
--- a/GraphBLAS/@GrB/private/gb_convert_index_1d_to_2d.m
+++ b/GraphBLAS/@GrB/private/gb_convert_index_1d_to_2d.m
@@ -1,4 +1,4 @@
-function [i j] = gb_convert_index_1d_to_2d (k, m) ;
+function [i, j] = gb_convert_index_1d_to_2d (k, m)
 %GB_CONVERT_INDEX_1D_TO_2D convert 1D indices to 2D
 % the indices must be zero-based
 
diff --git a/GraphBLAS/@GrB/private/gb_error.m b/GraphBLAS/@GrB/private/gb_error.m
index 586b96fdd0..aed8aa21a0 100644
--- a/GraphBLAS/@GrB/private/gb_error.m
+++ b/GraphBLAS/@GrB/private/gb_error.m
@@ -1,7 +1,7 @@
 function gb_error (varargin)
 %GB_ERROR report an error
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:error', varargin {:}) ;
diff --git a/GraphBLAS/@GrB/private/gb_get_scalar.m b/GraphBLAS/@GrB/private/gb_get_scalar.m
index abc5311359..7604dce2c0 100644
--- a/GraphBLAS/@GrB/private/gb_get_scalar.m
+++ b/GraphBLAS/@GrB/private/gb_get_scalar.m
@@ -2,7 +2,7 @@
 %GB_GET_SCALAR get the first scalar from a matrix
 
 [~, ~, x] = GrB.extracttuples (A) ;
-if (length (x) == 0)
+if (isempty (x))
     x = 0 ;
 else
     x = x (1) ;
diff --git a/GraphBLAS/@GrB/private/gbapply.m b/GraphBLAS/@GrB/private/gbapply.m
index 781901295b..ee314da82c 100644
--- a/GraphBLAS/@GrB/private/gbapply.m
+++ b/GraphBLAS/@GrB/private/gbapply.m
@@ -1,6 +1,6 @@
-function Cout = gbapply (Cin, M, accum, op, A, desc)
+function Cout = gbapply (Cin, M, accum, op, A, desc)    %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbassign.m b/GraphBLAS/@GrB/private/gbassign.m
index ef4a122d9d..64269582c8 100644
--- a/GraphBLAS/@GrB/private/gbassign.m
+++ b/GraphBLAS/@GrB/private/gbassign.m
@@ -1,6 +1,6 @@
-function Cout = gbassign (Cin, M, accum, A, I, J, desc)
+function Cout = gbassign (Cin, M, accum, A, I, J, desc)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbbinopinfo.m b/GraphBLAS/@GrB/private/gbbinopinfo.m
index 429f9b4a5a..0379391168 100644
--- a/GraphBLAS/@GrB/private/gbbinopinfo.m
+++ b/GraphBLAS/@GrB/private/gbbinopinfo.m
@@ -1,6 +1,6 @@
-function gbbinopinfo (op, type)
+function gbbinopinfo (op, type)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbbuild.m b/GraphBLAS/@GrB/private/gbbuild.m
index ead6cb3b7b..b58eafdf6c 100644
--- a/GraphBLAS/@GrB/private/gbbuild.m
+++ b/GraphBLAS/@GrB/private/gbbuild.m
@@ -1,6 +1,6 @@
-function A = gbbuild (I, J, X, m, n, dup, type, desc)
+function A = gbbuild (I, J, X, m, n, dup, type, desc)       %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbburble.m b/GraphBLAS/@GrB/private/gbburble.m
new file mode 100644
index 0000000000..c6944ebdd7
--- /dev/null
+++ b/GraphBLAS/@GrB/private/gbburble.m
@@ -0,0 +1,7 @@
+function c = gbburble (c)       %#ok
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
+
diff --git a/GraphBLAS/@GrB/private/gbchunk.m b/GraphBLAS/@GrB/private/gbchunk.m
index 4c128e58b2..984edacfba 100644
--- a/GraphBLAS/@GrB/private/gbchunk.m
+++ b/GraphBLAS/@GrB/private/gbchunk.m
@@ -1,6 +1,6 @@
-function c = gbchunk (c)
+function c = gbchunk (c)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbdescriptorinfo.m b/GraphBLAS/@GrB/private/gbdescriptorinfo.m
index b86f0151a4..45caf32354 100644
--- a/GraphBLAS/@GrB/private/gbdescriptorinfo.m
+++ b/GraphBLAS/@GrB/private/gbdescriptorinfo.m
@@ -1,6 +1,6 @@
-function gbdescriptorinfo (d)
+function gbdescriptorinfo (d)       %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbdisp.m b/GraphBLAS/@GrB/private/gbdisp.m
index c5037e6d96..c676645c81 100644
--- a/GraphBLAS/@GrB/private/gbdisp.m
+++ b/GraphBLAS/@GrB/private/gbdisp.m
@@ -1,6 +1,6 @@
-function gbdisp (C, cnz, level)
+function gbdisp (C, cnz, level)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbeadd.m b/GraphBLAS/@GrB/private/gbeadd.m
index 90dbcf38d0..46642eee2e 100644
--- a/GraphBLAS/@GrB/private/gbeadd.m
+++ b/GraphBLAS/@GrB/private/gbeadd.m
@@ -1,6 +1,6 @@
-function Cout = gbeadd (Cin, M, accum, semiring, A, B, desc)
+function Cout = gbeadd (Cin, M, accum, semiring, A, B, desc)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbemult.m b/GraphBLAS/@GrB/private/gbemult.m
index d2874ddae4..bb882f0570 100644
--- a/GraphBLAS/@GrB/private/gbemult.m
+++ b/GraphBLAS/@GrB/private/gbemult.m
@@ -1,6 +1,6 @@
-function Cout = gbemult (Cin, M, accum, semiring, A, B, desc)
+function Cout = gbemult (Cin, M, accum, semiring, A, B, desc)       %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbextract.m b/GraphBLAS/@GrB/private/gbextract.m
index d42e8eefbc..6ce0e113d1 100644
--- a/GraphBLAS/@GrB/private/gbextract.m
+++ b/GraphBLAS/@GrB/private/gbextract.m
@@ -1,6 +1,6 @@
-function Cout = gbextract (Cin, M, accum, A, I, J, desc)
+function Cout = gbextract (Cin, M, accum, A, I, J, desc)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbextracttuples.m b/GraphBLAS/@GrB/private/gbextracttuples.m
index 67b6d30ba1..d36aad3204 100644
--- a/GraphBLAS/@GrB/private/gbextracttuples.m
+++ b/GraphBLAS/@GrB/private/gbextracttuples.m
@@ -1,6 +1,6 @@
-function [I,J,X] = gbextracttuples (A, desc)
+function [I,J,X] = gbextracttuples (A, desc)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbextractvalues.m b/GraphBLAS/@GrB/private/gbextractvalues.m
index 3d990b6677..e158b037d5 100644
--- a/GraphBLAS/@GrB/private/gbextractvalues.m
+++ b/GraphBLAS/@GrB/private/gbextractvalues.m
@@ -1,6 +1,6 @@
-function X = gbextractvalues (A)
+function X = gbextractvalues (A)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbformat.m b/GraphBLAS/@GrB/private/gbformat.m
index b4555a7790..85872b8fb6 100644
--- a/GraphBLAS/@GrB/private/gbformat.m
+++ b/GraphBLAS/@GrB/private/gbformat.m
@@ -1,6 +1,6 @@
-function f = gbformat (arg)
+function f = gbformat (arg)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbfull.m b/GraphBLAS/@GrB/private/gbfull.m
index 5d29d45414..feaffe1861 100644
--- a/GraphBLAS/@GrB/private/gbfull.m
+++ b/GraphBLAS/@GrB/private/gbfull.m
@@ -1,6 +1,6 @@
-function F = gbfull (G, type, identity, descriptor)
+function F = gbfull (G, type, identity, descriptor)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbisequal.m b/GraphBLAS/@GrB/private/gbisequal.m
index 92b7bd229f..32437b2514 100644
--- a/GraphBLAS/@GrB/private/gbisequal.m
+++ b/GraphBLAS/@GrB/private/gbisequal.m
@@ -1,6 +1,6 @@
-function s = gbisequal (A, B)
+function s = gbisequal (A, B)       %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbkronecker.m b/GraphBLAS/@GrB/private/gbkronecker.m
index 880e857778..232919040a 100644
--- a/GraphBLAS/@GrB/private/gbkronecker.m
+++ b/GraphBLAS/@GrB/private/gbkronecker.m
@@ -1,6 +1,6 @@
-function Cout = gbkronecker (Cin, M, accum, op, A, B, desc)
+function Cout = gbkronecker (Cin, M, accum, op, A, B, desc)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gblogassign.m b/GraphBLAS/@GrB/private/gblogassign.m
index 32fec111bf..680d8a63f2 100644
--- a/GraphBLAS/@GrB/private/gblogassign.m
+++ b/GraphBLAS/@GrB/private/gblogassign.m
@@ -1,6 +1,6 @@
-function Cout = gblogassign (Cin, M, A)
+function Cout = gblogassign (Cin, M, A)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gblogextract.m b/GraphBLAS/@GrB/private/gblogextract.m
index e9f61f678d..f672925421 100644
--- a/GraphBLAS/@GrB/private/gblogextract.m
+++ b/GraphBLAS/@GrB/private/gblogextract.m
@@ -1,6 +1,6 @@
-function C = gblogextract (A, M)
+function C = gblogextract (A, M)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbmake.m b/GraphBLAS/@GrB/private/gbmake.m
index bfaba404eb..6172781741 100644
--- a/GraphBLAS/@GrB/private/gbmake.m
+++ b/GraphBLAS/@GrB/private/gbmake.m
@@ -20,7 +20,7 @@ function gbmake (what)
 %
 % See also: mex, version, GrB.clear
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if verLessThan ('matlab', '9.4')
@@ -61,6 +61,7 @@ function gbmake (what)
             end
         end
     end
+catch
 end
 
 if (~ismac && isunix)
@@ -107,7 +108,7 @@ function gbmake (what)
     objfile = [ ofile(1:end-2) object_suffix ] ;
 
     % get the object file modification time
-    objlist = [ objlist ' ' objfile ] ;
+    objlist = [ objlist ' ' objfile ] ;     %#ok
     dobj = dir (objfile) ;
     if (isempty (dobj))
         % there is no object file; the cfile must be compiled
diff --git a/GraphBLAS/@GrB/private/gbmonoidinfo.m b/GraphBLAS/@GrB/private/gbmonoidinfo.m
index 3252294000..9431df202d 100644
--- a/GraphBLAS/@GrB/private/gbmonoidinfo.m
+++ b/GraphBLAS/@GrB/private/gbmonoidinfo.m
@@ -1,6 +1,6 @@
-function gbmonoidinfo (monoid, type)
+function gbmonoidinfo (monoid, type)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbmxm.m b/GraphBLAS/@GrB/private/gbmxm.m
index 158b762dce..7fefaa451f 100644
--- a/GraphBLAS/@GrB/private/gbmxm.m
+++ b/GraphBLAS/@GrB/private/gbmxm.m
@@ -1,6 +1,6 @@
-function Cout = gbmxm (Cin, M, accum, semiring, A, B, desc)
+function Cout = gbmxm (Cin, M, accum, semiring, A, B, desc)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbnew.m b/GraphBLAS/@GrB/private/gbnew.m
index 8a94114e59..41d219b331 100644
--- a/GraphBLAS/@GrB/private/gbnew.m
+++ b/GraphBLAS/@GrB/private/gbnew.m
@@ -1,6 +1,6 @@
-function G = gbnew (arg1, arg2, arg3)
+function G = gbnew (arg1, arg2, arg3)       %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbnvals.m b/GraphBLAS/@GrB/private/gbnvals.m
index 9bf12026e6..3193393a10 100644
--- a/GraphBLAS/@GrB/private/gbnvals.m
+++ b/GraphBLAS/@GrB/private/gbnvals.m
@@ -1,6 +1,6 @@
-function nvals = gbnvals (G)
+function nvals = gbnvals (G)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbreduce.m b/GraphBLAS/@GrB/private/gbreduce.m
index 3096ce0622..115097af3d 100644
--- a/GraphBLAS/@GrB/private/gbreduce.m
+++ b/GraphBLAS/@GrB/private/gbreduce.m
@@ -1,6 +1,6 @@
-function cout = gbreduce (cin, accum, op, A, desc)
+function cout = gbreduce (cin, accum, op, A, desc)      %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbselect.m b/GraphBLAS/@GrB/private/gbselect.m
index 2dc9565666..06fd8d2c95 100644
--- a/GraphBLAS/@GrB/private/gbselect.m
+++ b/GraphBLAS/@GrB/private/gbselect.m
@@ -1,6 +1,6 @@
-function Cout = gbselect (Cin, M, accum, op, A, b, desc)
+function Cout = gbselect (Cin, M, accum, op, A, b, desc)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbselectopinfo.m b/GraphBLAS/@GrB/private/gbselectopinfo.m
index a58d95eee8..b9a2c0cf04 100644
--- a/GraphBLAS/@GrB/private/gbselectopinfo.m
+++ b/GraphBLAS/@GrB/private/gbselectopinfo.m
@@ -1,6 +1,6 @@
-function gbselectopinfo (selectop, type)
+function gbselectopinfo (selectop, type)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbsemiringinfo.m b/GraphBLAS/@GrB/private/gbsemiringinfo.m
index 6237d55fdd..6549b68d24 100644
--- a/GraphBLAS/@GrB/private/gbsemiringinfo.m
+++ b/GraphBLAS/@GrB/private/gbsemiringinfo.m
@@ -1,6 +1,6 @@
-function gbsemiringinfo (semiring, type)
+function gbsemiringinfo (semiring, type)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbsetup.m b/GraphBLAS/@GrB/private/gbsetup.m
index d4bf9f554d..88eebe293c 100644
--- a/GraphBLAS/@GrB/private/gbsetup.m
+++ b/GraphBLAS/@GrB/private/gbsetup.m
@@ -1,6 +1,6 @@
-function gbsetup (action)
+function gbsetup
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbsize.m b/GraphBLAS/@GrB/private/gbsize.m
index 570d7bbf24..d111dc260b 100644
--- a/GraphBLAS/@GrB/private/gbsize.m
+++ b/GraphBLAS/@GrB/private/gbsize.m
@@ -1,6 +1,6 @@
-function [m, n] = gbsize (G)
+function [m, n] = gbsize (G)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbsparse.m b/GraphBLAS/@GrB/private/gbsparse.m
index 7928029ce9..520fe95781 100644
--- a/GraphBLAS/@GrB/private/gbsparse.m
+++ b/GraphBLAS/@GrB/private/gbsparse.m
@@ -1,6 +1,6 @@
-function A = gbsparse (X, type)
+function A = gbsparse (X, type)     %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbsubassign.m b/GraphBLAS/@GrB/private/gbsubassign.m
index 4d5b6d073c..70ddd58882 100644
--- a/GraphBLAS/@GrB/private/gbsubassign.m
+++ b/GraphBLAS/@GrB/private/gbsubassign.m
@@ -1,6 +1,6 @@
-function Cout = gbsubassign (Cin, M, accum, A, I, j, desc)
+function Cout = gbsubassign (Cin, M, accum, A, I, j, desc)      %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbthreads.m b/GraphBLAS/@GrB/private/gbthreads.m
index 8691c29edc..0eab336bd7 100644
--- a/GraphBLAS/@GrB/private/gbthreads.m
+++ b/GraphBLAS/@GrB/private/gbthreads.m
@@ -1,6 +1,6 @@
-function nthreads = gbthreads (nthreads)
+function nthreads = gbthreads (nthreads)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbtrans.m b/GraphBLAS/@GrB/private/gbtrans.m
index 1d02aa6638..7bbda483d2 100644
--- a/GraphBLAS/@GrB/private/gbtrans.m
+++ b/GraphBLAS/@GrB/private/gbtrans.m
@@ -1,6 +1,6 @@
-function Cout = gbtrans (Cin, M, accum, A, desc)
+function Cout = gbtrans (Cin, M, accum, A, desc)        %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbtype.m b/GraphBLAS/@GrB/private/gbtype.m
index 8dd58f761b..aaf40321f4 100644
--- a/GraphBLAS/@GrB/private/gbtype.m
+++ b/GraphBLAS/@GrB/private/gbtype.m
@@ -1,6 +1,6 @@
-function type = gbtype (X)
+function type = gbtype (X)      %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbunopinfo.m b/GraphBLAS/@GrB/private/gbunopinfo.m
index b39c4be3ef..faf06bed03 100644
--- a/GraphBLAS/@GrB/private/gbunopinfo.m
+++ b/GraphBLAS/@GrB/private/gbunopinfo.m
@@ -1,6 +1,6 @@
-function gbunopinfo (op, type)
+function gbunopinfo (op, type)      %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/gbvreduce.m b/GraphBLAS/@GrB/private/gbvreduce.m
index 03308d00e2..16ac4d3d1a 100644
--- a/GraphBLAS/@GrB/private/gbvreduce.m
+++ b/GraphBLAS/@GrB/private/gbvreduce.m
@@ -1,6 +1,6 @@
-function Cout = gbvreduce (Cin, M, accum, op, A, desc)
+function Cout = gbvreduce (Cin, M, accum, op, A, desc)      %#ok
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 error ('GrB:mex', 'mexFunction not found; use gbmake to compile GraphBLAS') ;
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbapply.c b/GraphBLAS/@GrB/private/mexfunctions/gbapply.c
index b9bf8dab94..c7e2d28ebb 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbapply.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbapply.c
@@ -2,7 +2,7 @@
 // gbapply: apply a unary operator to a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbassign.c b/GraphBLAS/@GrB/private/mexfunctions/gbassign.c
index 2497db29c6..c59760c8a2 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbassign.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbassign.c
@@ -2,7 +2,7 @@
 // gbassign: assign entries into a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbbinopinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbbinopinfo.c
index 631cf77d89..61f6d1f5ed 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbbinopinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbbinopinfo.c
@@ -2,7 +2,7 @@
 // gbbinopinfo : print a GraphBLAS binary op (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbbuild.c b/GraphBLAS/@GrB/private/mexfunctions/gbbuild.c
index 8f06ff0168..24b7fefb27 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbbuild.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbbuild.c
@@ -2,7 +2,7 @@
 // gbbuild: build a GraphBLAS matrix or a MATLAB sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbburble.c b/GraphBLAS/@GrB/private/mexfunctions/gbburble.c
new file mode 100644
index 0000000000..46efd40164
--- /dev/null
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbburble.c
@@ -0,0 +1,62 @@
+//------------------------------------------------------------------------------
+// gbburble: get/set the burble setting for diagnostic output
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "gb_matlab.h"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    gb_usage (nargin <= 1 && nargout <= 1,
+        "usage: b = GrB.burble ; or GrB.burble (b)") ;
+
+    //--------------------------------------------------------------------------
+    // set the burble, if requested
+    //--------------------------------------------------------------------------
+
+    bool b ;
+
+    if (nargin > 0)
+    { 
+        // set the burble
+        if (gb_mxarray_is_scalar (pargin [0]))
+        {
+            // argument is a numeric scalar
+            b = (bool) mxGetScalar (pargin [0]) ;
+        }
+        else if (mxIsLogicalScalar (pargin [0]))
+        {
+            // argument is a logical scalar
+            b = (bool) mxIsLogicalScalarTrue (pargin [0]) ;
+        }
+        else
+        {
+            ERROR ("input must be a scalar") ;
+        }
+        OK (GxB_Global_Option_set (GxB_BURBLE, b)) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // return the burble
+    //--------------------------------------------------------------------------
+
+    OK (GxB_Global_Option_get (GxB_BURBLE, &b)) ;
+    pargout [0] = mxCreateDoubleScalar (b) ;
+    GB_WRAPUP ;
+}
+
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbchunk.c b/GraphBLAS/@GrB/private/mexfunctions/gbchunk.c
index 5395e7212b..2f6354a155 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbchunk.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbchunk.c
@@ -2,7 +2,7 @@
 // gbchunk: get/set the chunk size to use in GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbdegree.c b/GraphBLAS/@GrB/private/mexfunctions/gbdegree.c
new file mode 100644
index 0000000000..47c929c6c8
--- /dev/null
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbdegree.c
@@ -0,0 +1,150 @@
+//------------------------------------------------------------------------------
+// gbdegree: number of entries in each vector of a GraphBLAS matrix struct
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// The input may be either a GraphBLAS matrix struct or a standard MATLAB
+// sparse matrix.
+
+#include "gb_matlab.h"
+
+#define USAGE "usage: degree = gbdegree (X, native)"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    gb_usage (nargin == 2 && nargout <= 1, USAGE) ;
+
+    //--------------------------------------------------------------------------
+    // get the inputs 
+    //--------------------------------------------------------------------------
+
+    GrB_Matrix X = gb_get_shallow (pargin [0]) ;
+    bool native = (mxGetScalar (pargin [1]) != 0) ;
+
+    //--------------------------------------------------------------------------
+    // get the degree of each row or column of X
+    //--------------------------------------------------------------------------
+
+    int64_t *degree = NULL ;
+    GrB_Index *list = NULL, nvec = 0 ;
+    GrB_Vector d = NULL ;
+
+    if (native)
+    {
+
+        //----------------------------------------------------------------------
+        // get the degree of each vector of X
+        //----------------------------------------------------------------------
+
+        if (!GB_matlab_helper9 (X, &degree, &list, &nvec))
+        {
+            ERROR ("out of memory") ;
+        }
+        OK (GxB_Vector_import (&d, GrB_INT64, X->vdim, nvec, &list, &degree,
+            NULL)) ;
+
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // get the degree of each index of X
+        //----------------------------------------------------------------------
+
+        GrB_Index nvals, nrows, ncols ;
+        OK (GrB_Matrix_nvals (&nvals, X)) ;
+        OK (GrB_Matrix_nrows (&nrows, X)) ;
+        OK (GrB_Matrix_ncols (&ncols, X)) ;
+        GxB_Format_Value fmt ;
+        OK (GxB_Matrix_Option_get (X, GxB_FORMAT, &fmt)) ;
+        GrB_Vector y = NULL ;
+
+        if (fmt == GxB_BY_COL)
+        {
+
+            //------------------------------------------------------------------
+            // compute the degree of each row of X, where X is held by column
+            //------------------------------------------------------------------
+
+            if (nvals < ncols / 16 && ncols > 256)
+            {
+                // X is hypersparse, or might as well be, so let y be the
+                // pattern of nonempty columns of X.
+                if (!GB_matlab_helper9 (X, &degree, &list, &nvec))
+                {
+                    ERROR ("out of memory") ;
+                }
+                OK (GxB_Vector_import (&y, GrB_INT64, ncols, nvec,
+                    &list, &degree, NULL)) ;
+            }
+            else
+            {
+                // y = dense vector of size ncols-by-1; value is not relevant
+                OK (GrB_Vector_new (&y, GrB_BOOL, ncols)) ;
+                OK (GrB_Vector_assign_BOOL (y, NULL, NULL, false, GrB_ALL,
+                    ncols, NULL)) ;
+            }
+
+            // d = X*y using the PLUS_PAIR semiring
+            OK (GrB_Vector_new (&d, GrB_INT64, nrows)) ;
+            OK (GrB_mxv (d, NULL, NULL, GxB_PLUS_PAIR_INT64, X, y, NULL)) ;
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // compute the degree of each column of X, where X is held by row
+            //------------------------------------------------------------------
+
+            if (nvals < nrows / 16 && nrows > 256)
+            {
+                // X is hypersparse, or might as well be, so let y be the
+                // pattern of nonempty rows of X.
+                if (!GB_matlab_helper9 (X, &degree, &list, &nvec))
+                {
+                    ERROR ("out of memory") ;
+                }
+                OK (GxB_Vector_import (&y, GrB_INT64, nrows, nvec,
+                    &list, &degree, NULL)) ;
+            }
+            else
+            {
+                // y = dense vector of size nrows-by-1; value is not relevant
+                OK (GrB_Vector_new (&y, GrB_BOOL, nrows)) ;
+                OK (GrB_Vector_assign_BOOL (y, NULL, NULL, false, GrB_ALL,
+                    nrows, NULL)) ;
+            }
+
+            // d = y*X using the PLUS_PAIR semiring
+            OK (GrB_Vector_new (&d, GrB_INT64, ncols)) ;
+            OK (GrB_vxm (d, NULL, NULL, GxB_PLUS_PAIR_INT64, y, X, NULL)) ;
+        }
+
+        OK (GrB_Vector_free (&y)) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    OK (GrB_Matrix_free (&X)) ;
+    pargout [0] = gb_export (&d, KIND_GRB) ;
+    GB_WRAPUP ;
+}
+
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbdescriptorinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbdescriptorinfo.c
index 8517dd96e9..bfa8830d50 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbdescriptorinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbdescriptorinfo.c
@@ -2,7 +2,7 @@
 // gbdescriptorinfo: print a GraphBLAS descriptor (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbdisp.c b/GraphBLAS/@GrB/private/mexfunctions/gbdisp.c
index bd8284b893..2c8ef2d7d6 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbdisp.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbdisp.c
@@ -2,7 +2,7 @@
 // gbdisp: display a GraphBLAS matrix struct
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbeadd.c b/GraphBLAS/@GrB/private/mexfunctions/gbeadd.c
index 6584529485..4d63ba3d75 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbeadd.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbeadd.c
@@ -2,7 +2,7 @@
 // gbeadd: sparse matrix addition
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbemult.c b/GraphBLAS/@GrB/private/mexfunctions/gbemult.c
index 9e70479fde..05553a79bc 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbemult.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbemult.c
@@ -2,7 +2,7 @@
 // gbemult: sparse matrix element-wise multiplication
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbextract.c b/GraphBLAS/@GrB/private/mexfunctions/gbextract.c
index 31afe8c0d3..337d2825fa 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbextract.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbextract.c
@@ -2,7 +2,7 @@
 // gbextract: extract entries into a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbextracttuples.c b/GraphBLAS/@GrB/private/mexfunctions/gbextracttuples.c
index 6ae8ca5338..7baad3d83a 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbextracttuples.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbextracttuples.c
@@ -2,7 +2,7 @@
 // gbextracttuples: extract all entries from a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbextractvalues.c b/GraphBLAS/@GrB/private/mexfunctions/gbextractvalues.c
index f444644977..f60ab705c8 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbextractvalues.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbextractvalues.c
@@ -2,7 +2,7 @@
 // gbextractvalues: extract all entries from a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbformat.c b/GraphBLAS/@GrB/private/mexfunctions/gbformat.c
index 3c24f92da3..8e0e04d3fb 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbformat.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbformat.c
@@ -2,7 +2,7 @@
 // gbformat: get/set the matrix format to use in GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbfull.c b/GraphBLAS/@GrB/private/mexfunctions/gbfull.c
index 0b70b9e42f..b2af610413 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbfull.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbfull.c
@@ -2,7 +2,7 @@
 // gbfull: convert a GraphBLAS matrix struct into a MATLAB dense matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbisequal.c b/GraphBLAS/@GrB/private/mexfunctions/gbisequal.c
index eec9326c67..d969f9212b 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbisequal.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbisequal.c
@@ -2,7 +2,7 @@
 // gbisequal: isequal (A,B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbkronecker.c b/GraphBLAS/@GrB/private/mexfunctions/gbkronecker.c
index f960598189..c1a30dab33 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbkronecker.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbkronecker.c
@@ -2,7 +2,7 @@
 // gbkronecker: sparse matrix Kronecker product
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gblogassign.c b/GraphBLAS/@GrB/private/mexfunctions/gblogassign.c
index c4934fd5ef..9ead80cd1a 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gblogassign.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gblogassign.c
@@ -2,7 +2,7 @@
 // gblogassign: logical assignment: C(M) = A
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gblogextract.c b/GraphBLAS/@GrB/private/mexfunctions/gblogextract.c
index 59204a5721..a250266307 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gblogextract.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gblogextract.c
@@ -2,7 +2,7 @@
 // gblogextract: logical extraction: C = A(M)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbmonoidinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbmonoidinfo.c
index bab3fe82df..f7a264aef2 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbmonoidinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbmonoidinfo.c
@@ -2,7 +2,7 @@
 // gbmonoidinfo : print a GraphBLAS monoid (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbmxm.c b/GraphBLAS/@GrB/private/mexfunctions/gbmxm.c
index dfb5967d11..56c5667609 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbmxm.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbmxm.c
@@ -2,7 +2,7 @@
 // gbmxm: sparse matrix-matrix multiplication
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbnew.c b/GraphBLAS/@GrB/private/mexfunctions/gbnew.c
index f0a69e3630..5d31c71539 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbnew.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbnew.c
@@ -2,7 +2,7 @@
 // gbnew: create a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbnorm.c b/GraphBLAS/@GrB/private/mexfunctions/gbnorm.c
new file mode 100644
index 0000000000..d1c8a1faec
--- /dev/null
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbnorm.c
@@ -0,0 +1,78 @@
+//------------------------------------------------------------------------------
+// gbnorm: norm (A,kind)
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "gb_matlab.h"
+
+#define USAGE "usage: s = gbnorm (A, kind)"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    gb_usage (nargin == 2 && nargout <= 1, USAGE) ;
+
+    //--------------------------------------------------------------------------
+    // get the inputs 
+    //--------------------------------------------------------------------------
+
+    GrB_Matrix A = gb_get_shallow (pargin [0]) ;
+    int64_t norm_kind = gb_norm_kind (pargin [1]) ;
+
+    GrB_Type atype ;
+    OK (GxB_Matrix_type (&atype, A)) ;
+
+    GrB_Index anrows, ancols ;
+    OK (GrB_Matrix_nrows (&anrows, A)) ;
+    OK (GrB_Matrix_ncols (&ancols, A)) ;
+
+    //--------------------------------------------------------------------------
+    // s = norm (A,kind)
+    //--------------------------------------------------------------------------
+
+    double s ;
+
+    if (norm_kind == INT64_MIN && !GB_is_dense (A))
+    {
+        // norm (A,-inf) is zero if A is not dense
+        s = 0 ;
+    }
+    else if ((atype == GrB_FP32 || atype == GrB_FP64)
+        && (anrows == 1 || ancols == 1 || norm_kind == 0))
+    {
+        // s = norm (A,p) where A is an FP32 or FP64 vector,
+        // or when p = 0 (for Frobenius norm)
+        GrB_Index anz ;
+        OK (GrB_Matrix_nvals (&anz, A)) ;
+        s = GB_matlab_helper10 (A->x, NULL, atype, norm_kind, anz) ;
+        if (s < 0) ERROR ("unknown norm") ;
+    }
+    else
+    {
+        // s = norm (A, norm_kind)
+        s = gb_norm (A, norm_kind) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    OK (GrB_Matrix_free (&A)) ;
+    pargout [0] = mxCreateDoubleScalar (s) ;
+    GB_WRAPUP ;
+}
+
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbnormdiff.c b/GraphBLAS/@GrB/private/mexfunctions/gbnormdiff.c
new file mode 100644
index 0000000000..73ee9c515f
--- /dev/null
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbnormdiff.c
@@ -0,0 +1,102 @@
+//------------------------------------------------------------------------------
+// gbnormdiff: norm (A-B,kind)
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "gb_matlab.h"
+
+#define USAGE "usage: s = gbnormdiff (A, B, kind)"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    gb_usage (nargin == 3 && nargout <= 1, USAGE) ;
+
+    //--------------------------------------------------------------------------
+    // get the inputs 
+    //--------------------------------------------------------------------------
+
+    GrB_Matrix A = gb_get_shallow (pargin [0]) ;
+    GrB_Matrix B = gb_get_shallow (pargin [1]) ;
+    int64_t norm_kind = gb_norm_kind (pargin [2]) ;
+
+    GrB_Type atype, btype ;
+    OK (GxB_Matrix_type (&atype, A)) ;
+    OK (GxB_Matrix_type (&btype, B)) ;
+
+    GrB_Index anrows, ancols, bnrows, bncols ;
+    OK (GrB_Matrix_nrows (&anrows, A)) ;
+    OK (GrB_Matrix_ncols (&ancols, A)) ;
+    OK (GrB_Matrix_nrows (&bnrows, B)) ;
+    OK (GrB_Matrix_ncols (&bncols, B)) ;
+    if (anrows != bnrows || ancols != bncols)
+    {
+        ERROR ("A and B must have the same size") ;
+    }
+
+    //--------------------------------------------------------------------------
+    // s = norm (A-B,kind)
+    //--------------------------------------------------------------------------
+
+    double s ;
+
+    if (GB_is_dense (A) && GB_is_dense (B) &&
+        (atype == GrB_FP32 || atype == GrB_FP64) && (atype == btype)
+        && (anrows == 1 || ancols == 1 || norm_kind == 0))
+    {
+        // s = norm (A-B,p) where A and B are dense FP32 or FP64 vectors,
+        // or when p = 0 (for Frobenius norm)
+        GrB_Index anz ;
+        OK (GrB_Matrix_nvals (&anz, A)) ;
+        s = GB_matlab_helper10 (A->x, B->x, atype, norm_kind, anz) ;
+        if (s < 0) ERROR ("unknown norm") ;
+    }
+    else
+    {
+        GrB_Type xtype ;
+        GrB_BinaryOp op ;
+        if (atype == GrB_FP32 && atype == btype)
+        {
+            xtype = GrB_FP32 ;
+            op = GrB_MINUS_FP32 ;
+        }
+        else
+        {
+            xtype = GrB_FP64 ;
+            op = GrB_MINUS_FP64 ;
+        }
+
+        // X = A-B
+        GrB_Matrix X ;
+        OK (GrB_Matrix_new (&X, xtype, anrows, ancols)) ;
+        OK (GrB_eWiseAdd_Matrix_BinaryOp (X, NULL, NULL, op, A, B, NULL)) ;
+
+        // s = norm (X, norm_kind)
+        s = gb_norm (X, norm_kind) ;
+        OK (GrB_Matrix_free (&X)) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    OK (GrB_Matrix_free (&A)) ;
+    OK (GrB_Matrix_free (&B)) ;
+    pargout [0] = mxCreateDoubleScalar (s) ;
+    GB_WRAPUP ;
+}
+
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbnvals.c b/GraphBLAS/@GrB/private/mexfunctions/gbnvals.c
index 709b362866..f3ef03a05d 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbnvals.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbnvals.c
@@ -2,7 +2,7 @@
 // gbnvals: number of entries in a GraphBLAS matrix struct
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbreduce.c b/GraphBLAS/@GrB/private/mexfunctions/gbreduce.c
index 0e1b5863be..55159019f2 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbreduce.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbreduce.c
@@ -2,7 +2,7 @@
 // gbreduce: reduce a sparse matrix to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbselect.c b/GraphBLAS/@GrB/private/mexfunctions/gbselect.c
index 2a85465b2f..ce43f42dc7 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbselect.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbselect.c
@@ -2,7 +2,7 @@
 // gbselect: select entries from a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbselectopinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbselectopinfo.c
index 10782a401d..01440f2c2f 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbselectopinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbselectopinfo.c
@@ -2,7 +2,7 @@
 // gbselectopinfo : print a GraphBLAS selectop (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbsemiringinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbsemiringinfo.c
index dc178ab447..46acfba61a 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbsemiringinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbsemiringinfo.c
@@ -2,7 +2,7 @@
 // gbsemiringinfo: print a GraphBLAS semiring (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbsetup.c b/GraphBLAS/@GrB/private/mexfunctions/gbsetup.c
index 00abbd7453..b1d11683ae 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbsetup.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbsetup.c
@@ -2,24 +2,19 @@
 // gbsetup: initialize or finalize GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// The gbsetup mexFunction is the only mexFunction that locks itself into
-// MATLAB working memory.  gbsetup ('start') locks this mexFunction, and then
-// initializes GraphBLAS by setting all GraphBLAS global variables and calling
-// GxB_init.  gbsetup ('finish') unlocks this mexFunction, and finalizes
-// GraphBLAS by calling GrB_finalize.
+// gbsetup initializes GraphBLAS by calling GxB_init and by setting
+// all GraphBLAS global variables to their MATLAB defaults.
 
 // Usage:
 
-// gbsetup ('start') ;
-// gbsetup ('finish') ;
+// gbsetup ;
 
 #include "gb_matlab.h"
-#include "GB_printf.h"
 
 void mexFunction
 (
@@ -39,90 +34,25 @@ void mexFunction
     #endif
 
     //--------------------------------------------------------------------------
-    // register the function to clear GraphBLAS
+    // finalize GraphBLAS, if it is already started
     //--------------------------------------------------------------------------
 
-    mexAtExit (gb_at_exit) ;
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    if (nargin != 1 || nargout != 0 || !mxIsChar (pargin [0]))
+    if (GB_Global_GrB_init_called_get ( ))
     {
-        ERROR ("usage: gbsetup (action)") ;
+        GrB_finalize ( ) ;
     }
 
     //--------------------------------------------------------------------------
-    // get the action
+    // allow GraphBLAS to be called again
     //--------------------------------------------------------------------------
 
-    #define LEN 256
-    char action [LEN+2] ;
-    gb_mxstring_to_string (action, LEN, pargin [0], "action") ;
-
-    if (MATCH (action, "start"))
-    { 
-
-        //----------------------------------------------------------------------
-        // initialize GraphBLAS
-        //----------------------------------------------------------------------
-
-        if (mexIsLocked ( ) || GB_Global_GrB_init_called_get ( ))
-        {
-            ERROR ("GrB.init already called") ;
-        }
-        mexLock ( ) ;
-
-        //----------------------------------------------------------------------
-        // set the printf function
-        //----------------------------------------------------------------------
-
-        GB_printf_function = mexPrintf ;
-
-        //----------------------------------------------------------------------
-        // initialize GraphBLAS
-        //----------------------------------------------------------------------
-
-        OK (GxB_init (GrB_NONBLOCKING, mxMalloc, mxCalloc, mxRealloc, mxFree,
-            false)) ;
-
-        //----------------------------------------------------------------------
-        // MATLAB matrices are stored by column
-        //----------------------------------------------------------------------
+    GB_Global_GrB_init_called_set (false) ;
 
-        OK (GxB_Global_Option_set (GxB_FORMAT, GxB_BY_COL)) ;
-
-        // print short format by default
-        GB_Global_print_format_set (1) ;
-
-        // print 1-based indices
-        GB_Global_print_one_based_set (true) ;
-
-        // to make the Sauna workspace persistent
-        GB_Global_persist_function_set (mexMakeMemoryPersistent) ;
-
-    }
-    else if (MATCH (action, "finish"))
-    { 
-
-        //----------------------------------------------------------------------
-        // finalize GraphBLAS
-        //----------------------------------------------------------------------
-
-        if (!mexIsLocked ( ))
-        {
-            ERROR ("GrB.finalize can only be called after GrB.init") ;
-        }
-        mexUnlock ( ) ;
-
-        gb_at_exit ( ) ;
+    //--------------------------------------------------------------------------
+    // initialize GraphBLAS
+    //--------------------------------------------------------------------------
 
-    }
-    else
-    { 
-        ERROR ("gbsetup: unknown action") ;
-    }
+    gb_usage (true, "start") ;
 
     //--------------------------------------------------------------------------
     // save test coverage
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbsize.c b/GraphBLAS/@GrB/private/mexfunctions/gbsize.c
index 265440422c..92e3503768 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbsize.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbsize.c
@@ -2,7 +2,7 @@
 // gbsize: number of rows and columns in a GraphBLAS matrix struct
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbsparse.c b/GraphBLAS/@GrB/private/mexfunctions/gbsparse.c
index f842b0f0dd..f7bda51da6 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbsparse.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbsparse.c
@@ -2,7 +2,7 @@
 // gbsparse: convert a GraphBLAS matrix struct into a MATLAB sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbsubassign.c b/GraphBLAS/@GrB/private/mexfunctions/gbsubassign.c
index 448dba1567..d96f9d7db4 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbsubassign.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbsubassign.c
@@ -2,7 +2,7 @@
 // gbsubassign: assign entries into a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbthreads.c b/GraphBLAS/@GrB/private/mexfunctions/gbthreads.c
index 0109b70e33..5d70d77e7d 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbthreads.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbthreads.c
@@ -2,7 +2,7 @@
 // gbthreads: get/set the maximum # of threads to use in GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbtrans.c b/GraphBLAS/@GrB/private/mexfunctions/gbtrans.c
index 549bb4843e..e7036a4c57 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbtrans.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbtrans.c
@@ -2,7 +2,7 @@
 // gbtrans: sparse matrix transpose
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbtype.c b/GraphBLAS/@GrB/private/mexfunctions/gbtype.c
index 31669df355..a21c0c6630 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbtype.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbtype.c
@@ -2,7 +2,7 @@
 // gbtype: type of a GraphBLAS matrix struct, or any MATLAB variable
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbunopinfo.c b/GraphBLAS/@GrB/private/mexfunctions/gbunopinfo.c
index 38ce862c03..d25e2fcf26 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbunopinfo.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbunopinfo.c
@@ -2,7 +2,7 @@
 // gbunopinfo : print a GraphBLAS unary op (for illustration only)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/mexfunctions/gbvreduce.c b/GraphBLAS/@GrB/private/mexfunctions/gbvreduce.c
index ba4f5bf670..6699ab15ac 100644
--- a/GraphBLAS/@GrB/private/mexfunctions/gbvreduce.c
+++ b/GraphBLAS/@GrB/private/mexfunctions/gbvreduce.c
@@ -2,7 +2,7 @@
 // gbvreduce: reduce a matrix to a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_assign.c b/GraphBLAS/@GrB/private/util/gb_assign.c
index 2a0e6dc401..6137555b36 100644
--- a/GraphBLAS/@GrB/private/util/gb_assign.c
+++ b/GraphBLAS/@GrB/private/util/gb_assign.c
@@ -2,7 +2,7 @@
 // gb_assign: assign entries into a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_at_exit.c b/GraphBLAS/@GrB/private/util/gb_at_exit.c
deleted file mode 100644
index 8e637514f0..0000000000
--- a/GraphBLAS/@GrB/private/util/gb_at_exit.c
+++ /dev/null
@@ -1,42 +0,0 @@
-//------------------------------------------------------------------------------
-// gb_at_exit: function to call if GraphBLAS is cleared
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Each mexFunction in the MATLAB interface to GraphBLAS registers this
-// function via mexAtExit, so that if the mexFunction is cleared, GraphBLAS is
-// finalized.  The global flag that keeps track of whether or not GrB_init has
-// been called is reset to false, to allow further use of GraphBLAS in the same
-// MATLAB session.
-
-#include "gb_matlab.h"
-
-void gb_at_exit (void)
-{
-
-    //--------------------------------------------------------------------------
-    // finalize GraphBLAS, unless it's already been done
-    //--------------------------------------------------------------------------
-
-    if (GB_Global_GrB_init_called_get ( ))
-    { 
-
-        //----------------------------------------------------------------------
-        // finish GraphBLAS
-        //----------------------------------------------------------------------
-
-        GrB_finalize ( ) ;
-
-    }
-
-    //--------------------------------------------------------------------------
-    // allow GraphBLAS to be called again
-    //--------------------------------------------------------------------------
-
-    GB_Global_GrB_init_called_set (false) ;
-}
-
diff --git a/GraphBLAS/@GrB/private/util/gb_binop_to_monoid.c b/GraphBLAS/@GrB/private/util/gb_binop_to_monoid.c
index 157e4caf32..4d9a9dcf44 100644
--- a/GraphBLAS/@GrB/private/util/gb_binop_to_monoid.c
+++ b/GraphBLAS/@GrB/private/util/gb_binop_to_monoid.c
@@ -2,7 +2,7 @@
 // gb_binop_to_monoid: convert a binary operator to the corresponding monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_by_col.c b/GraphBLAS/@GrB/private/util/gb_by_col.c
index 76fc34ce49..cf1807e346 100644
--- a/GraphBLAS/@GrB/private/util/gb_by_col.c
+++ b/GraphBLAS/@GrB/private/util/gb_by_col.c
@@ -2,7 +2,7 @@
 // gb_by_col: ensure a matrix is stored by column
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_default_format.c b/GraphBLAS/@GrB/private/util/gb_default_format.c
index 4b2fe089eb..465b8c3b6f 100644
--- a/GraphBLAS/@GrB/private/util/gb_default_format.c
+++ b/GraphBLAS/@GrB/private/util/gb_default_format.c
@@ -2,7 +2,7 @@
 // gb_default_format: determine the default format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_export.c b/GraphBLAS/@GrB/private/util/gb_export.c
index 5ad4ad7473..75029e73e7 100644
--- a/GraphBLAS/@GrB/private/util/gb_export.c
+++ b/GraphBLAS/@GrB/private/util/gb_export.c
@@ -2,7 +2,7 @@
 // gb_export: export a GrB_Matrix as a MATLAB matrix or GraphBLAS struct
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_export_to_mxfull.c b/GraphBLAS/@GrB/private/util/gb_export_to_mxfull.c
index fa8e5efbbf..4f09de2b62 100644
--- a/GraphBLAS/@GrB/private/util/gb_export_to_mxfull.c
+++ b/GraphBLAS/@GrB/private/util/gb_export_to_mxfull.c
@@ -2,7 +2,7 @@
 // gb_export_to_mxfull: export a dense array to a MATLAB dense matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_export_to_mxsparse.c b/GraphBLAS/@GrB/private/util/gb_export_to_mxsparse.c
index ebd1d7ea94..2723c81d30 100644
--- a/GraphBLAS/@GrB/private/util/gb_export_to_mxsparse.c
+++ b/GraphBLAS/@GrB/private/util/gb_export_to_mxsparse.c
@@ -2,7 +2,7 @@
 // gb_export_to_mxsparse: export a GrB_Matrix to a MATLAB sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_export_to_mxstruct.c b/GraphBLAS/@GrB/private/util/gb_export_to_mxstruct.c
index f5d835d5d6..20dacea840 100644
--- a/GraphBLAS/@GrB/private/util/gb_export_to_mxstruct.c
+++ b/GraphBLAS/@GrB/private/util/gb_export_to_mxstruct.c
@@ -2,7 +2,7 @@
 // gb_export_to_mxstruct: export a GrB_Matrix to a MATLAB struct
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_find_dot.c b/GraphBLAS/@GrB/private/util/gb_find_dot.c
index d96ebb791d..f1e40ccf54 100644
--- a/GraphBLAS/@GrB/private/util/gb_find_dot.c
+++ b/GraphBLAS/@GrB/private/util/gb_find_dot.c
@@ -2,7 +2,7 @@
 // gb_find_dot:  find the first two occurences of '.' in a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_first_binop.c b/GraphBLAS/@GrB/private/util/gb_first_binop.c
index 8b213253b8..67695d2a44 100644
--- a/GraphBLAS/@GrB/private/util/gb_first_binop.c
+++ b/GraphBLAS/@GrB/private/util/gb_first_binop.c
@@ -2,7 +2,7 @@
 // gb_first_binop: return the GrB_FIRST operator for a given type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_get_deep.c b/GraphBLAS/@GrB/private/util/gb_get_deep.c
index 4d9bcb5ead..0df7c8b26b 100644
--- a/GraphBLAS/@GrB/private/util/gb_get_deep.c
+++ b/GraphBLAS/@GrB/private/util/gb_get_deep.c
@@ -2,7 +2,7 @@
 // gb_get_deep: create a deep GrB_Matrix copy of a MATLAB X
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_get_format.c b/GraphBLAS/@GrB/private/util/gb_get_format.c
index 50743adf58..052c52be69 100644
--- a/GraphBLAS/@GrB/private/util/gb_get_format.c
+++ b/GraphBLAS/@GrB/private/util/gb_get_format.c
@@ -2,7 +2,7 @@
 // gb_get_format: determine the format of a matrix result 
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_get_mxargs.c b/GraphBLAS/@GrB/private/util/gb_get_mxargs.c
index d75eb06ee6..ce33317300 100644
--- a/GraphBLAS/@GrB/private/util/gb_get_mxargs.c
+++ b/GraphBLAS/@GrB/private/util/gb_get_mxargs.c
@@ -2,7 +2,7 @@
 // gb_get_mxargs: get input arguments to a GraphBLAS mexFunction 
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_get_shallow.c b/GraphBLAS/@GrB/private/util/gb_get_shallow.c
index 3611096e63..65fa04e947 100644
--- a/GraphBLAS/@GrB/private/util/gb_get_shallow.c
+++ b/GraphBLAS/@GrB/private/util/gb_get_shallow.c
@@ -2,7 +2,7 @@
 // gb_get_shallow: create a shallow copy of a MATLAB sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_is_all.c b/GraphBLAS/@GrB/private/util/gb_is_all.c
index 7984b969a1..898999c4ce 100644
--- a/GraphBLAS/@GrB/private/util/gb_is_all.c
+++ b/GraphBLAS/@GrB/private/util/gb_is_all.c
@@ -2,7 +2,7 @@
 // gb_is_all: check two matrices
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_is_equal.c b/GraphBLAS/@GrB/private/util/gb_is_equal.c
index e0e687bb19..2964ba14f5 100644
--- a/GraphBLAS/@GrB/private/util/gb_is_equal.c
+++ b/GraphBLAS/@GrB/private/util/gb_is_equal.c
@@ -2,7 +2,7 @@
 // gb_is_equal: check two matrices for exact equality
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_is_shallow.c b/GraphBLAS/@GrB/private/util/gb_is_shallow.c
index 9daa805f29..5fd03a38eb 100644
--- a/GraphBLAS/@GrB/private/util/gb_is_shallow.c
+++ b/GraphBLAS/@GrB/private/util/gb_is_shallow.c
@@ -2,7 +2,7 @@
 // gb_is_shallow: determine if a GrB_matrix has any shallow components
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_is_vector.c b/GraphBLAS/@GrB/private/util/gb_is_vector.c
index a3f179c880..ee7b5eba00 100644
--- a/GraphBLAS/@GrB/private/util/gb_is_vector.c
+++ b/GraphBLAS/@GrB/private/util/gb_is_vector.c
@@ -2,7 +2,7 @@
 // gb_is_vector: determine if a GrB_matrix is a row or column vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_matlab.h b/GraphBLAS/@GrB/private/util/gb_matlab.h
index 4a2d359235..0b5119fd03 100644
--- a/GraphBLAS/@GrB/private/util/gb_matlab.h
+++ b/GraphBLAS/@GrB/private/util/gb_matlab.h
@@ -2,7 +2,7 @@
 // gb_matlab.h: definitions for MATLAB interface for SuiteSparse:GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -306,8 +306,6 @@ void gb_mxfree              // mxFree wrapper
     void **p_handle         // handle to pointer to be freed
 ) ;
 
-void gb_at_exit (void)  ;   // called when GraphBLAS is cleared by MATLAB
-
 int64_t *gb_mxarray_to_list     // return List of integers
 (
     const mxArray *mxList,      // list to extract
@@ -448,5 +446,13 @@ void gb_get_mxargs
     GxB_Format_Value *fmt       // desc.format
 ) ;
 
+int64_t gb_norm_kind (const mxArray *arg) ;
+
+double gb_norm              // compute norm (A,kind)
+(
+    GrB_Matrix A,
+    int64_t norm_kind       // 0, 1, 2, INT64_MAX, or INT64_MIN
+) ;
+
 #endif
 
diff --git a/GraphBLAS/@GrB/private/util/gb_matrix_assign_scalar.c b/GraphBLAS/@GrB/private/util/gb_matrix_assign_scalar.c
index 12ef9a5674..5948932bae 100644
--- a/GraphBLAS/@GrB/private/util/gb_matrix_assign_scalar.c
+++ b/GraphBLAS/@GrB/private/util/gb_matrix_assign_scalar.c
@@ -2,7 +2,7 @@
 // gb_matrix_assign_scalar: assign scalar into a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxarray_is_empty.c b/GraphBLAS/@GrB/private/util/gb_mxarray_is_empty.c
index 9d2589efb1..e4d8a1c505 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxarray_is_empty.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxarray_is_empty.c
@@ -2,7 +2,7 @@
 // gb_mxarray_is_empty: check if a MATLAB mxArray is empty
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxarray_is_scalar.c b/GraphBLAS/@GrB/private/util/gb_mxarray_is_scalar.c
index cbe13762cd..d128204b3b 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxarray_is_scalar.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxarray_is_scalar.c
@@ -2,7 +2,7 @@
 // gb_mxarray_is_scalar: check if MATLAB mxArray is a non-sparse numeric scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxarray_to_descriptor.c b/GraphBLAS/@GrB/private/util/gb_mxarray_to_descriptor.c
index 2253517910..148dd9091f 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxarray_to_descriptor.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxarray_to_descriptor.c
@@ -2,7 +2,7 @@
 // gb_mxarray_to_descriptor: get the contents of a GraphBLAS Descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -66,9 +66,17 @@ static void get_descriptor
             { 
                 OK (GxB_Desc_set (D, field, GrB_TRAN)) ;
             }
-            else if (MATCH (s, "complement"))
+            else if (MATCH (s, "complement") || MATCH (s, "comp"))
             { 
-                OK (GxB_Desc_set (D, field, GrB_SCMP)) ;
+                OK (GxB_Desc_set (D, field, GrB_COMP)) ;
+            }
+            else if (MATCH (s, "structure") || MATCH (s, "structural"))
+            { 
+                OK (GxB_Desc_set (D, field, GrB_STRUCTURE)) ;
+            }
+            else if (MATCH (s, "structural complement"))
+            { 
+                OK (GxB_Desc_set (D, field, GrB_COMP + GrB_STRUCTURE)) ;
             }
             else if (MATCH (s, "replace"))
             { 
@@ -82,10 +90,18 @@ static void get_descriptor
             { 
                 OK (GxB_Desc_set (D, field, GxB_AxB_DOT)) ;
             }
+            else if (MATCH (s, "saxpy"))
+            { 
+                OK (GxB_Desc_set (D, field, GxB_AxB_SAXPY)) ;
+            }
             else if (MATCH (s, "heap"))
             { 
                 OK (GxB_Desc_set (D, field, GxB_AxB_HEAP)) ;
             }
+            else if (MATCH (s, "hash"))
+            { 
+                OK (GxB_Desc_set (D, field, GxB_AxB_HASH)) ;
+            }
             else
             { 
                 // the string must be one of the strings listed above
diff --git a/GraphBLAS/@GrB/private/util/gb_mxarray_to_list.c b/GraphBLAS/@GrB/private/util/gb_mxarray_to_list.c
index d52295fb5d..e13a2d2289 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxarray_to_list.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxarray_to_list.c
@@ -2,7 +2,7 @@
 // gb_mxarray_to_list: convert a MATLAB array to a list of integers
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxarray_type.c b/GraphBLAS/@GrB/private/util/gb_mxarray_type.c
index d41ae109ff..6fce69dcc7 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxarray_type.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxarray_type.c
@@ -2,7 +2,7 @@
 // gb_mxarray_type: return the GraphBLAS type of a MATLAB matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxcell_to_index.c b/GraphBLAS/@GrB/private/util/gb_mxcell_to_index.c
index 6cb8802d22..74b959b5fb 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxcell_to_index.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxcell_to_index.c
@@ -2,7 +2,7 @@
 // gb_mxcell_to_index: convert cell array to index list I or colon expression
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxfree.c b/GraphBLAS/@GrB/private/util/gb_mxfree.c
index 776ce390a4..583791b7c0 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxfree.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxfree.c
@@ -2,7 +2,7 @@
 // gb_mxfree: mxFree wrapper
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_binop.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_binop.c
index ada1110408..30dd70d204 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_binop.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_binop.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_binop: get a GraphBLAS operator from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_format.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_format.c
index a63f9a81ec..f79fcd08cc 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_format.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_format.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_format: get the format from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_monoid.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_monoid.c
index f0139100ad..b2ffa86596 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_monoid.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_monoid.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_monoid: get a GraphBLAS monoid from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_selectop.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_selectop.c
index 9dbb2502e1..6c0f8d53d8 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_selectop.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_selectop.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_selectop: get a GraphBLAS select operator from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_semiring.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_semiring.c
index a27f0ab7ed..778be54c97 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_semiring.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_semiring.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_semiring: get a GraphBLAS semiring from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_string.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_string.c
index 27c57cea25..c413da33c0 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_string.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_string.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_string: copy a MATLAB string into a C string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_type.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_type.c
index 03952011e5..d6e7da5e7d 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_type.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_type.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_type: return the GraphBLAS type from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_mxstring_to_unop.c b/GraphBLAS/@GrB/private/util/gb_mxstring_to_unop.c
index c8297021de..b4e36cdf7f 100644
--- a/GraphBLAS/@GrB/private/util/gb_mxstring_to_unop.c
+++ b/GraphBLAS/@GrB/private/util/gb_mxstring_to_unop.c
@@ -2,7 +2,7 @@
 // gb_mxstring_to_unop: get a GraphBLAS unary operator from a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_norm.c b/GraphBLAS/@GrB/private/util/gb_norm.c
new file mode 100644
index 0000000000..b3077c7e39
--- /dev/null
+++ b/GraphBLAS/@GrB/private/util/gb_norm.c
@@ -0,0 +1,183 @@
+//------------------------------------------------------------------------------
+// gb_norm: compute the norm of a GraphBLAS matrix
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "gb_matlab.h"
+
+double gb_norm              // compute norm (A,kind)
+(
+    GrB_Matrix A,
+    int64_t norm_kind       // 0, 1, 2, INT64_MAX, or INT64_MIN
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get input matrix, select types and operators, and allocate X
+    //--------------------------------------------------------------------------
+
+    GrB_Index nrows, ncols, nvals ;
+    OK (GrB_Matrix_nvals (&nvals, A)) ;
+    if (nvals == 0) return ((double) 0) ;
+
+    GrB_Type atype, xtype ;
+    OK (GrB_Matrix_nrows (&nrows, A)) ;
+    OK (GrB_Matrix_ncols (&ncols, A)) ;
+    OK (GxB_Matrix_type (&atype, A)) ;
+
+    GrB_UnaryOp absop ;
+    GrB_BinaryOp timesop ;
+    GrB_Monoid sumop, maxop, minop ;
+    GrB_Vector t = NULL ;
+    GrB_Matrix X = NULL ;
+
+    if (atype == GrB_FP32)
+    {
+        // if A is FP32, use the FP32 type and operators
+        xtype = GrB_FP32 ;
+        absop = GxB_ABS_FP32 ;
+        sumop = GxB_PLUS_FP32_MONOID ;
+        timesop = GrB_TIMES_FP32 ;
+        maxop = GxB_MAX_FP32_MONOID ;
+        minop = GxB_MIN_FP32_MONOID ;
+    }
+    else
+    {
+        // otherwise, use FP64 type and operators; this will typecast the 
+        // input matrix to FP64 if A is not of that type.
+        xtype = GrB_FP64 ;
+        absop = GxB_ABS_FP64 ;
+        sumop = GxB_PLUS_FP64_MONOID ;
+        timesop = GrB_TIMES_FP64 ;
+        maxop = GxB_MAX_FP64_MONOID ;
+        minop = GxB_MIN_FP64_MONOID ;
+    }
+
+    OK (GrB_Matrix_new (&X, xtype, nrows, ncols)) ;
+
+    //--------------------------------------------------------------------------
+    // compute the norm
+    //--------------------------------------------------------------------------
+
+    double s = 0 ;
+
+    if (nrows == 1 || ncols == 1 || norm_kind == 0)
+    {
+
+        //----------------------------------------------------------------------
+        // vector or Frobenius norm
+        //----------------------------------------------------------------------
+
+        switch (norm_kind)
+        {
+
+            case 0:     // Frobenius norm
+            case 2:     // 2-norm
+
+                // X = A .* A
+                OK (GrB_eWiseMult_Matrix_BinaryOp (X, NULL, NULL, timesop,
+                    A, A, NULL)) ;
+                // s = sum (X)
+                OK (GrB_Matrix_reduce_FP64 (&s, NULL, sumop, X, NULL)) ;
+                s = sqrt (s) ;
+                break ;
+
+            case 1:     // 1-norm
+
+                // X = abs (A)
+                OK (GrB_Matrix_apply (X, NULL, NULL, absop, A, NULL)) ;
+                // s = sum (X)
+                OK (GrB_Matrix_reduce_FP64 (&s, NULL, sumop, X, NULL)) ;
+                break ;
+
+            case INT64_MAX:     // inf-norm
+
+                // X = abs (A)
+                OK (GrB_Matrix_apply (X, NULL, NULL, absop, A, NULL)) ;
+                // s = max (X)
+                OK (GrB_Matrix_reduce_FP64 (&s, NULL, maxop, X, NULL)) ;
+                break ;
+
+            case INT64_MIN:     // (-inf)-norm
+
+                if (!GB_is_dense (A))
+                {
+                    // X = abs (A)
+                    OK (GrB_Matrix_apply (X, NULL, NULL, absop, A, NULL)) ;
+                    // s = min (X)
+                    OK (GrB_Matrix_reduce_FP64 (&s, NULL, minop, X, NULL)) ;
+                }
+                break ;
+
+            default:
+
+                ERROR ("unknown norm") ;
+                break ;
+        }
+
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // matrix norm
+        //----------------------------------------------------------------------
+
+        switch (norm_kind)
+        {
+
+            case 2:     // 2-norm
+
+                ERROR ("2-norm not available for GrB matrices") ;
+                break ;
+
+            case 1:     // 1-norm:  max sum of columns of abs (A)
+
+                // X = abs (A)
+                OK (GrB_Matrix_apply (X, NULL, NULL, absop, A, NULL)) ;
+                // t = zeros (ncols,1)
+                OK (GrB_Vector_new (&t, xtype, ncols)) ;
+                // t(j) = sum of the ith column, X(:,j)
+                OK (GrB_Matrix_reduce_Monoid (t, NULL, NULL, sumop, X,
+                    GrB_DESC_T0)) ;
+                // s = max (t)
+                OK (GrB_Vector_reduce_FP64 (&s, NULL, maxop, t, NULL)) ;
+                break ;
+
+            case INT64_MAX:     // inf-norm:  max sum of rows of abs (A)
+
+                // X = abs (A)
+                OK (GrB_Matrix_apply (X, NULL, NULL, absop, A, NULL)) ;
+                // t = zeros (nrows,1)
+                OK (GrB_Vector_new (&t, xtype, nrows)) ;
+                // t(i) = sum of the ith row, X(i,:)
+                OK (GrB_Matrix_reduce_Monoid (t, NULL, NULL, sumop, X, NULL)) ;
+                // s = max (t)
+                OK (GrB_Vector_reduce_FP64 (&s, NULL, maxop, t, NULL)) ;
+                break ;
+
+            case INT64_MIN:
+
+                ERROR ("(-inf)-norm not available for GrB matrices") ;
+                break ;
+
+            default:
+
+                ERROR ("unknown norm") ;
+                break ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    OK (GrB_Matrix_free (&X)) ;
+    OK (GrB_Vector_free (&t)) ;
+    return (s) ;
+}
+
diff --git a/GraphBLAS/@GrB/private/util/gb_norm_kind.c b/GraphBLAS/@GrB/private/util/gb_norm_kind.c
new file mode 100644
index 0000000000..90f0d37647
--- /dev/null
+++ b/GraphBLAS/@GrB/private/util/gb_norm_kind.c
@@ -0,0 +1,60 @@
+//------------------------------------------------------------------------------
+// gb_norm_kind: determine the kind of norm to compute
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "gb_matlab.h"
+
+// 'fro':       Frobenius norm
+// 1:           1-norm
+// 2:           2-norm
+// INFINITY:    inf-norm
+// -INFINITY:   (-inf)-norm
+
+int64_t gb_norm_kind (const mxArray *arg)
+{
+    if (mxIsChar (arg))
+    {
+        char string [65] ;
+        gb_mxstring_to_string (string, 64, arg, "kind") ;
+        if (MATCH (string, "fro"))
+        {
+            return (0) ;
+        }
+        else
+        {
+            // unknown string
+            ERROR ("unknown norm") ;
+        }
+    }
+    else if (mxIsScalar (arg))
+    {
+        double x = mxGetScalar (arg) ;
+        if (x == INFINITY)
+        {
+            return (INT64_MAX) ;
+        }
+        else if (x == -INFINITY)
+        {
+            return (INT64_MIN) ;
+        }
+        else if (x == 1 || x == 2)
+        {
+            return ((int64_t) x) ;
+        }
+        else
+        {
+            ERROR ("unknown norm") ;
+        }
+    }
+    else
+    {
+        // arg must be a scalar
+        ERROR ("unknown norm") ;
+    }
+}
+
diff --git a/GraphBLAS/@GrB/private/util/gb_semiring.c b/GraphBLAS/@GrB/private/util/gb_semiring.c
index 0041568a7f..efd942f137 100644
--- a/GraphBLAS/@GrB/private/util/gb_semiring.c
+++ b/GraphBLAS/@GrB/private/util/gb_semiring.c
@@ -2,7 +2,7 @@
 // gb_semiring: get a built-in semiring from an add and multiply operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,34 +13,34 @@
 // built-in semirings
 //------------------------------------------------------------------------------
 
-// Using built-in types and operators, 1040 unique semirings can be built.  This
+// Using built-in types and operators, 1355 unique semirings can be built.  This
 // count excludes redundant Boolean operators (for example GxB_TIMES_BOOL and
 // GxB_LAND_BOOL are different operators but they are redundant since they
 // always return the same result):
 
-// 760 semirings with a multiply operator TxT -> T where T is non-Boolean, from
+// 1000 semirings with a multiply operator TxT -> T where T is non-Boolean, from
 // the complete cross product of:
 
-//      4 add monoids (MIN, MAX, PLUS, TIMES)
-//      19 multiply operators:
-//          (FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
-//           ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//           LOR, LAND, LXOR)
+//      5 add monoids (MIN, MAX, PLUS, TIMES, ANY)
+//      20 multiply operators:
+//         FIRST, SECOND, PAIR, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
+//         ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
+//         LOR, LAND, LXOR
 //      10 non-Boolean types, T
 
-// 240 semirings with a comparison operator TxT -> bool, where T is
+// 300 semirings with a comparison operator TxT -> bool, where T is
 // non-Boolean, from the complete cross product of:
 
-//      4 Boolean add monoids: (LAND, LOR, LXOR, EQ)
+//      5 Boolean add monoids: (LAND, LOR, LXOR, EQ, ANY)
 //      6 multiply operators: (EQ, NE, GT, LT, GE, LE)
 //      10 non-Boolean types, T
 
-// 40 semirings with purely Boolean types, bool x bool -> bool, from the
+// 55 semirings with purely Boolean types, bool x bool -> bool, from the
 // complete cross product of:
 
-//      4 Boolean add monoids (LAND, LOR, LXOR, EQ)
-//      10 multiply operators:
-//          (FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE)
+//      5 Boolean add monoids (LAND, LOR, LXOR, EQ, ANY)
+//      11 multiply operators:
+//          FIRST, SECOND, PAIR, LOR, LAND, LXOR, EQ, GT, LT, GE, LE
 
 // In the names below, each semiring has a name of the form GxB_add_mult_T
 // where add is the additive monoid, mult is the multiply operator, and T is
@@ -64,29 +64,36 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     // check inputs
     //--------------------------------------------------------------------------
 
-    CHECK_ERROR (add == NULL || mult == NULL, "invalid semiring") ;
+    CHECK_ERROR (add == NULL || mult == NULL,
+        "invalid semiring (add or mult missing)") ;
 
     GB_Opcode add_opcode  = add->opcode ;       // add opcode
     GB_Opcode mult_opcode = mult->opcode ;      // multiply opcode
 
     // add must be a monoid
-    CHECK_ERROR (add->xtype != add->ztype, "invalid semiring") ;
-    CHECK_ERROR (add->ytype != add->ztype, "invalid semiring") ;
+    CHECK_ERROR (add->xtype != add->ztype,
+        "invalid semiring (add operator not a monoid)") ;
+    CHECK_ERROR (add->ytype != add->ztype,
+        "invalid semiring (add operator not a monoid)") ;
 
     // the type of add must match the mult->ztype
-    CHECK_ERROR (add->ztype != mult->ztype, "invalid semiring") ;
+    CHECK_ERROR (add->ztype != mult->ztype, 
+        "invalid semiring (add opeartor not a monoid)") ;
 
     // The conditions above are true for any semiring and any A and B, whether
     // or not this function handles the semiring as hard-coded.  Now return for
     // cases this function does not handle.  This function handles only
-    // built-in operators or compile-time user-defined operators.
+    // built-in operators.
 
-    CHECK_ERROR (add_opcode  >= GB_USER_R_opcode, "invalid semiring") ;
-    CHECK_ERROR (mult_opcode >= GB_USER_R_opcode, "invalid semiring") ;
+    CHECK_ERROR (add_opcode  >= GB_USER_opcode,
+        "invalid semiring (add operator not built-in)") ;
+    CHECK_ERROR (mult_opcode >= GB_USER_opcode,
+        "invalid semiring (multiply operator not built-in)") ;
 
     // this condition is true for all built-in operators, but not required for
     // user-defined operators.  FUTURE: likely true for complex semirings too.
-    CHECK_ERROR (mult->xtype != mult->ytype, "invalid semiring") ;
+    CHECK_ERROR (mult->xtype != mult->ytype,
+        "invalid semiring (x and y types differ)") ;
 
     //--------------------------------------------------------------------------
     // rename redundant Boolean multiply operators
@@ -95,9 +102,10 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     GB_Type_code xycode = mult->xtype->code ;
     GB_Type_code zcode  = mult->ztype->code ;
 
-    // FUTURE: xycode and zcode may be GB_UCT_code with gb_complex_type
-    CHECK_ERROR (xycode >= GB_UDT_code, "invalid semiring") ;
-    CHECK_ERROR (zcode  >= GB_UDT_code, "invalid semiring") ;
+    CHECK_ERROR (xycode >= GB_UDT_code,
+        "invalid semiring (x and y type not built-in)") ;
+    CHECK_ERROR (zcode  >= GB_UDT_code,
+        "invalid semiring (z type not built-in)") ;
 
     if (xycode == GB_BOOL_code)
     { 
@@ -133,7 +141,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 640 semirings with TxT->T multiply operators
+        // 1000 semirings with TxT->T multiply operators
         //----------------------------------------------------------------------
 
         // x,y,z are all the same non-Boolean type
@@ -141,7 +149,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
         switch (mult_opcode)
         {
 
-            case GB_FIRST_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_FIRST_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -150,16 +158,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_FIRST_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_FIRST_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MIN_FIRST_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MIN_FIRST_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MIN_FIRST_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MIN_FIRST_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MIN_FIRST_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MIN_FIRST_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MIN_FIRST_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MIN_FIRST_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MIN_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_FIRST_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -168,16 +176,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_FIRST_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_FIRST_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MAX_FIRST_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MAX_FIRST_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MAX_FIRST_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MAX_FIRST_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MAX_FIRST_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MAX_FIRST_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MAX_FIRST_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MAX_FIRST_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MAX_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_FIRST_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -186,16 +194,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_FIRST_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_FIRST_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_FIRST_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_FIRST_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_FIRST_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_FIRST_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_FIRST_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_FIRST_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_FIRST_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_FIRST_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_FIRST_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_FIRST_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_FIRST_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_FIRST_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_FIRST_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_FIRST_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_FIRST_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_FIRST_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_FIRST_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_FIRST_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -204,16 +212,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_FIRST_INT8  ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_FIRST_UINT8 ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_FIRST_INT16 ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_FIRST_UINT16) ;
-                            case GB_INT32_code  : return (GxB_TIMES_FIRST_INT32 ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_FIRST_UINT32) ;
-                            case GB_INT64_code  : return (GxB_TIMES_FIRST_INT64 ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_FIRST_UINT64) ;
-                            case GB_FP32_code   : return (GxB_TIMES_FIRST_FP32  ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_FIRST_FP64  ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_FIRST_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_FIRST_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_FIRST_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_FIRST_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_FIRST_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_FIRST_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_FIRST_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_FIRST_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_FIRST_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_FIRST_FP64   ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_FIRST_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -221,7 +247,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_SECOND_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_SECOND_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -230,16 +256,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_SECOND_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_SECOND_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_MIN_SECOND_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_MIN_SECOND_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_MIN_SECOND_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_MIN_SECOND_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_MIN_SECOND_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_MIN_SECOND_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_MIN_SECOND_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_MIN_SECOND_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_MIN_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MIN_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MIN_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MIN_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MIN_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MIN_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MIN_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MIN_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MIN_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MIN_SECOND_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -248,16 +274,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_SECOND_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_SECOND_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_MAX_SECOND_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_MAX_SECOND_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_MAX_SECOND_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_MAX_SECOND_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_MAX_SECOND_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_MAX_SECOND_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_MAX_SECOND_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_MAX_SECOND_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_MAX_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MAX_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MAX_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MAX_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MAX_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MAX_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MAX_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MAX_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MAX_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MAX_SECOND_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -266,16 +292,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_SECOND_INT8  ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_SECOND_UINT8 ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_SECOND_INT16 ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_SECOND_UINT16) ;
-                            case GB_INT32_code  : return (GxB_PLUS_SECOND_INT32 ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_SECOND_UINT32) ;
-                            case GB_INT64_code  : return (GxB_PLUS_SECOND_INT64 ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_SECOND_UINT64) ;
-                            case GB_FP32_code   : return (GxB_PLUS_SECOND_FP32  ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_SECOND_FP64  ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_SECOND_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_SECOND_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_PLUS_SECOND_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_SECOND_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_PLUS_SECOND_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_SECOND_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_PLUS_SECOND_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_SECOND_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_SECOND_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_SECOND_FP64   ) ;
                             default : ;
                         }
                         break ;
@@ -284,16 +310,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_SECOND_INT8 ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_SECOND_UINT8) ;
-                            case GB_INT16_code  : return (GxB_TIMES_SECOND_INT16) ;
-                            case GB_UINT16_code : return (GxB_TIMES_SECOND_UINT16) ;
-                            case GB_INT32_code  : return (GxB_TIMES_SECOND_INT32) ;
-                            case GB_UINT32_code : return (GxB_TIMES_SECOND_UINT32) ;
-                            case GB_INT64_code  : return (GxB_TIMES_SECOND_INT64) ;
-                            case GB_UINT64_code : return (GxB_TIMES_SECOND_UINT64) ;
-                            case GB_FP32_code   : return (GxB_TIMES_SECOND_FP32 ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_SECOND_FP64 ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_SECOND_INT8  ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_SECOND_UINT8 ) ;
+                            case GB_INT16_code : return (GxB_TIMES_SECOND_INT16 ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_SECOND_UINT16) ;
+                            case GB_INT32_code : return (GxB_TIMES_SECOND_INT32 ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_SECOND_UINT32) ;
+                            case GB_INT64_code : return (GxB_TIMES_SECOND_INT64 ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_SECOND_UINT64) ;
+                            case GB_FP32_code  : return (GxB_TIMES_SECOND_FP32  ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_SECOND_FP64  ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_ANY_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_ANY_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_ANY_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_ANY_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_ANY_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_ANY_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_ANY_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_ANY_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_ANY_SECOND_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -301,7 +345,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_MIN_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_PAIR_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -310,16 +354,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_MIN_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_MIN_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MIN_MIN_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MIN_MIN_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MIN_MIN_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MIN_MIN_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MIN_MIN_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MIN_MIN_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MIN_MIN_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MIN_MIN_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MIN_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_PAIR_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -328,16 +372,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_MIN_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_MIN_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MAX_MIN_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MAX_MIN_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MAX_MIN_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MAX_MIN_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MAX_MIN_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MAX_MIN_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MAX_MIN_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MAX_MIN_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MAX_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_PAIR_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -346,16 +390,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_MIN_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_MIN_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_MIN_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_MIN_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_MIN_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_MIN_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_MIN_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_MIN_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_MIN_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_MIN_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_PAIR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_PAIR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_PAIR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_PAIR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_PAIR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_PAIR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_PAIR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_PAIR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_PAIR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_PAIR_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -364,16 +408,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_MIN_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_MIN_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_MIN_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_MIN_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_MIN_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_MIN_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_MIN_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_MIN_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_MIN_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_MIN_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_PAIR_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_PAIR_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_PAIR_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_PAIR_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_PAIR_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_PAIR_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_PAIR_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_PAIR_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_PAIR_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_PAIR_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_PAIR_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -381,7 +443,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_MAX_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MIN_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -390,16 +452,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_MAX_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_MAX_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MIN_MAX_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MIN_MAX_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MIN_MAX_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MIN_MAX_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MIN_MAX_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MIN_MAX_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MIN_MAX_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MIN_MAX_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MIN_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MIN_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -408,16 +470,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_MAX_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_MAX_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MAX_MAX_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MAX_MAX_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MAX_MAX_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MAX_MAX_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MAX_MAX_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MAX_MAX_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MAX_MAX_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MAX_MAX_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MAX_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MIN_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -426,16 +488,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_MAX_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_MAX_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_MAX_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_MAX_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_MAX_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_MAX_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_MAX_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_MAX_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_MAX_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_MAX_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_MIN_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MIN_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MIN_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MIN_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MIN_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MIN_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MIN_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MIN_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MIN_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MIN_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -444,16 +506,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_MAX_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_MAX_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_MAX_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_MAX_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_MAX_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_MAX_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_MAX_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_MAX_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_MAX_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_MAX_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_MIN_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MIN_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MIN_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MIN_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MIN_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MIN_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MIN_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MIN_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MIN_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MIN_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MIN_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -461,7 +541,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_PLUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MAX_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -470,16 +550,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_PLUS_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_PLUS_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_PLUS_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_PLUS_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_PLUS_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_PLUS_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_PLUS_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_PLUS_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_PLUS_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_PLUS_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MAX_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -488,16 +568,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_PLUS_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_PLUS_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_PLUS_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_PLUS_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_PLUS_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_PLUS_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_PLUS_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_PLUS_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_PLUS_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_PLUS_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MAX_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -506,16 +586,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_PLUS_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_PLUS_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_PLUS_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_PLUS_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_PLUS_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_PLUS_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_PLUS_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_PLUS_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_PLUS_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_PLUS_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_MAX_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MAX_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MAX_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MAX_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MAX_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MAX_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MAX_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MAX_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MAX_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MAX_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -524,16 +604,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_PLUS_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_PLUS_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_PLUS_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_PLUS_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_PLUS_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_PLUS_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_PLUS_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_PLUS_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_PLUS_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_PLUS_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_MAX_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MAX_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MAX_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MAX_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MAX_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MAX_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MAX_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MAX_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MAX_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MAX_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MAX_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -541,7 +639,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_MINUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_PLUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -550,16 +648,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_MINUS_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_MINUS_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MIN_MINUS_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MIN_MINUS_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MIN_MINUS_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MIN_MINUS_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MIN_MINUS_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MIN_MINUS_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MIN_MINUS_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MIN_MINUS_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MIN_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_PLUS_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -568,16 +666,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_MINUS_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_MINUS_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MAX_MINUS_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MAX_MINUS_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MAX_MINUS_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MAX_MINUS_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MAX_MINUS_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MAX_MINUS_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MAX_MINUS_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MAX_MINUS_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MAX_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_PLUS_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -586,16 +684,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_MINUS_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_MINUS_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_MINUS_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_MINUS_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_MINUS_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_MINUS_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_MINUS_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_MINUS_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_MINUS_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_MINUS_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_PLUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_PLUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_PLUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_PLUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_PLUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_PLUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_PLUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_PLUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_PLUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_PLUS_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -604,16 +702,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_MINUS_INT8  ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_MINUS_UINT8 ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_MINUS_INT16 ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_MINUS_UINT16) ;
-                            case GB_INT32_code  : return (GxB_TIMES_MINUS_INT32 ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_MINUS_UINT32) ;
-                            case GB_INT64_code  : return (GxB_TIMES_MINUS_INT64 ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_MINUS_UINT64) ;
-                            case GB_FP32_code   : return (GxB_TIMES_MINUS_FP32  ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_MINUS_FP64  ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_PLUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_PLUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_PLUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_PLUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_PLUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_PLUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_PLUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_PLUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_PLUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_PLUS_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_PLUS_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -621,7 +737,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_RMINUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MINUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -630,16 +746,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_RMINUS_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_RMINUS_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MIN_RMINUS_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MIN_RMINUS_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MIN_RMINUS_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MIN_RMINUS_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MIN_RMINUS_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MIN_RMINUS_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MIN_RMINUS_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MIN_RMINUS_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MIN_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MINUS_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -648,16 +764,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_RMINUS_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_RMINUS_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MAX_RMINUS_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MAX_RMINUS_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MAX_RMINUS_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MAX_RMINUS_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MAX_RMINUS_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MAX_RMINUS_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MAX_RMINUS_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MAX_RMINUS_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MAX_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MINUS_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -666,16 +782,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_RMINUS_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_RMINUS_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_RMINUS_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_RMINUS_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_RMINUS_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_RMINUS_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_RMINUS_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_RMINUS_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_RMINUS_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_RMINUS_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_MINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MINUS_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -684,16 +800,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_RMINUS_INT8  ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_RMINUS_UINT8 ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_RMINUS_INT16 ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_RMINUS_UINT16) ;
-                            case GB_INT32_code  : return (GxB_TIMES_RMINUS_INT32 ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_RMINUS_UINT32) ;
-                            case GB_INT64_code  : return (GxB_TIMES_RMINUS_INT64 ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_RMINUS_UINT64) ;
-                            case GB_FP32_code   : return (GxB_TIMES_RMINUS_FP32  ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_RMINUS_FP64  ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_MINUS_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MINUS_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MINUS_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MINUS_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MINUS_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MINUS_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MINUS_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MINUS_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MINUS_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MINUS_FP64   ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MINUS_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -701,7 +835,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_TIMES_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_RMINUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -710,16 +844,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_TIMES_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_TIMES_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MIN_TIMES_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MIN_TIMES_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MIN_TIMES_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MIN_TIMES_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MIN_TIMES_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MIN_TIMES_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MIN_TIMES_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MIN_TIMES_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MIN_RMINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MIN_RMINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MIN_RMINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MIN_RMINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MIN_RMINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MIN_RMINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MIN_RMINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MIN_RMINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MIN_RMINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MIN_RMINUS_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -728,16 +862,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_TIMES_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_TIMES_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_MAX_TIMES_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_MAX_TIMES_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_MAX_TIMES_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_MAX_TIMES_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_MAX_TIMES_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_MAX_TIMES_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_MAX_TIMES_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_MAX_TIMES_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_MAX_RMINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MAX_RMINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MAX_RMINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MAX_RMINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MAX_RMINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MAX_RMINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MAX_RMINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MAX_RMINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MAX_RMINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MAX_RMINUS_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -746,16 +880,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_TIMES_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_TIMES_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_TIMES_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_TIMES_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_TIMES_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_TIMES_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_TIMES_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_TIMES_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_TIMES_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_TIMES_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_RMINUS_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_RMINUS_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_PLUS_RMINUS_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_RMINUS_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_PLUS_RMINUS_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_RMINUS_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_PLUS_RMINUS_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_RMINUS_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_RMINUS_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_RMINUS_FP64   ) ;
                             default : ;
                         }
                         break ;
@@ -764,16 +898,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_TIMES_INT8  ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_TIMES_UINT8 ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_TIMES_INT16 ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_TIMES_UINT16) ;
-                            case GB_INT32_code  : return (GxB_TIMES_TIMES_INT32 ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_TIMES_UINT32) ;
-                            case GB_INT64_code  : return (GxB_TIMES_TIMES_INT64 ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_TIMES_UINT64) ;
-                            case GB_FP32_code   : return (GxB_TIMES_TIMES_FP32  ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_TIMES_FP64  ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_RMINUS_INT8  ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_RMINUS_UINT8 ) ;
+                            case GB_INT16_code : return (GxB_TIMES_RMINUS_INT16 ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_RMINUS_UINT16) ;
+                            case GB_INT32_code : return (GxB_TIMES_RMINUS_INT32 ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_RMINUS_UINT32) ;
+                            case GB_INT64_code : return (GxB_TIMES_RMINUS_INT64 ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_RMINUS_UINT64) ;
+                            case GB_FP32_code  : return (GxB_TIMES_RMINUS_FP32  ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_RMINUS_FP64  ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_RMINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_ANY_RMINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_ANY_RMINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_ANY_RMINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_ANY_RMINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_ANY_RMINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_ANY_RMINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_ANY_RMINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_ANY_RMINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_ANY_RMINUS_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -781,7 +933,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_DIV_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_TIMES_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -790,16 +942,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_DIV_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_DIV_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MIN_DIV_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MIN_DIV_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MIN_DIV_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MIN_DIV_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MIN_DIV_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MIN_DIV_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MIN_DIV_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MIN_DIV_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MIN_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_TIMES_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -808,16 +960,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_DIV_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_DIV_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MAX_DIV_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MAX_DIV_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MAX_DIV_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MAX_DIV_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MAX_DIV_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MAX_DIV_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MAX_DIV_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MAX_DIV_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MAX_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_TIMES_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -826,16 +978,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_DIV_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_DIV_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_DIV_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_DIV_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_DIV_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_DIV_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_DIV_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_DIV_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_DIV_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_DIV_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_TIMES_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_TIMES_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_TIMES_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_TIMES_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_TIMES_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_TIMES_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_TIMES_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_TIMES_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_TIMES_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_TIMES_FP64    ) ;
                             default : ;
                         }
                         break ;
@@ -844,16 +996,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_DIV_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_DIV_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_DIV_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_DIV_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_DIV_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_DIV_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_DIV_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_DIV_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_DIV_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_DIV_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_TIMES_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_TIMES_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_TIMES_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_TIMES_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_TIMES_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_TIMES_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_TIMES_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_TIMES_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_TIMES_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_TIMES_FP64   ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_TIMES_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -861,7 +1031,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_RDIV_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_DIV_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -870,16 +1040,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_RDIV_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_RDIV_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MIN_RDIV_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MIN_RDIV_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MIN_RDIV_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MIN_RDIV_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MIN_RDIV_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MIN_RDIV_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MIN_RDIV_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MIN_RDIV_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MIN_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_DIV_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -888,16 +1058,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_RDIV_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_RDIV_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MAX_RDIV_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MAX_RDIV_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MAX_RDIV_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MAX_RDIV_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MAX_RDIV_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MAX_RDIV_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MAX_RDIV_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MAX_RDIV_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MAX_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_DIV_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -906,16 +1076,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_RDIV_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_RDIV_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_RDIV_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_RDIV_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_RDIV_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_RDIV_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_RDIV_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_RDIV_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_RDIV_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_RDIV_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_DIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_DIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_DIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_DIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_DIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_DIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_DIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_DIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_DIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_DIV_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -924,16 +1094,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_RDIV_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_RDIV_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_RDIV_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_RDIV_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_RDIV_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_RDIV_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_RDIV_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_RDIV_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_RDIV_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_RDIV_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_DIV_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_DIV_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_DIV_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_DIV_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_DIV_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_DIV_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_DIV_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_DIV_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_DIV_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_DIV_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_DIV_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -941,7 +1129,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISEQ_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_RDIV_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -950,16 +1138,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISEQ_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISEQ_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISEQ_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISEQ_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISEQ_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISEQ_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISEQ_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISEQ_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISEQ_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISEQ_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_RDIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_RDIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_RDIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_RDIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_RDIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_RDIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_RDIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_RDIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_RDIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_RDIV_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -968,16 +1156,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISEQ_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISEQ_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISEQ_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISEQ_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISEQ_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISEQ_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISEQ_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISEQ_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISEQ_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISEQ_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_RDIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_RDIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_RDIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_RDIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_RDIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_RDIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_RDIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_RDIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_RDIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_RDIV_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -986,16 +1174,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISEQ_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISEQ_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISEQ_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISEQ_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISEQ_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISEQ_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISEQ_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISEQ_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISEQ_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISEQ_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_RDIV_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_RDIV_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_RDIV_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_RDIV_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_RDIV_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_RDIV_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_RDIV_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_RDIV_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_RDIV_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_RDIV_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1004,16 +1192,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISEQ_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISEQ_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISEQ_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISEQ_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISEQ_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISEQ_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISEQ_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISEQ_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISEQ_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISEQ_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_RDIV_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_RDIV_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_RDIV_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_RDIV_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_RDIV_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_RDIV_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_RDIV_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_RDIV_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_RDIV_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_RDIV_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_RDIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_RDIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_RDIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_RDIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_RDIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_RDIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_RDIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_RDIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_RDIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_RDIV_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1021,7 +1227,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISNE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISEQ_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1030,16 +1236,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISNE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISNE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISNE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISNE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISNE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISNE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISNE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISNE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISNE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISNE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISEQ_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1048,16 +1254,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISNE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISNE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISNE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISNE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISNE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISNE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISNE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISNE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISNE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISNE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISEQ_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1066,16 +1272,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISNE_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISNE_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISNE_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISNE_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISNE_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISNE_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISNE_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISNE_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISNE_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISNE_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISEQ_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISEQ_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISEQ_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISEQ_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISEQ_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISEQ_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISEQ_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISEQ_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISEQ_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISEQ_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1084,16 +1290,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISNE_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISNE_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISNE_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISNE_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISNE_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISNE_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISNE_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISNE_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISNE_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISNE_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISEQ_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISEQ_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISEQ_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISEQ_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISEQ_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISEQ_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISEQ_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISEQ_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISEQ_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISEQ_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISEQ_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1101,7 +1325,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISGT_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISNE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1110,16 +1334,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISGT_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISGT_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISGT_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISGT_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISGT_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISGT_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISGT_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISGT_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISGT_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISGT_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISNE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1128,16 +1352,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISGT_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISGT_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISGT_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISGT_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISGT_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISGT_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISGT_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISGT_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISGT_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISGT_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISNE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1146,16 +1370,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISGT_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISGT_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISGT_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISGT_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISGT_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISGT_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISGT_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISGT_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISGT_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISGT_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISNE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISNE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISNE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISNE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISNE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISNE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISNE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISNE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISNE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISNE_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1164,16 +1388,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISGT_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISGT_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISGT_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISGT_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISGT_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISGT_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISGT_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISGT_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISGT_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISGT_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISNE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISNE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISNE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISNE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISNE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISNE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISNE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISNE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISNE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISNE_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISNE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1181,7 +1423,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISLT_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISGT_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1190,16 +1432,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISLT_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISLT_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISLT_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISLT_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISLT_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISLT_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISLT_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISLT_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISLT_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISLT_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISGT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1208,16 +1450,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISLT_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISLT_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISLT_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISLT_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISLT_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISLT_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISLT_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISLT_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISLT_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISLT_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISGT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1226,16 +1468,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISLT_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISLT_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISLT_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISLT_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISLT_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISLT_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISLT_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISLT_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISLT_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISLT_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISGT_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISGT_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISGT_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISGT_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISGT_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISGT_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISGT_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISGT_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISGT_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISGT_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1244,16 +1486,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISLT_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISLT_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISLT_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISLT_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISLT_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISLT_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISLT_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISLT_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISLT_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISLT_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISGT_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISGT_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISGT_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISGT_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISGT_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISGT_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISGT_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISGT_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISGT_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISGT_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISGT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1261,7 +1521,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISGE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISLT_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1270,16 +1530,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISGE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISGE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISGE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISGE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISGE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISGE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISGE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISGE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISGE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISGE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISLT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1288,16 +1548,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISGE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISGE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISGE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISGE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISGE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISGE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISGE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISGE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISGE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISGE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISLT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1306,16 +1566,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISGE_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISGE_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISGE_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISGE_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISGE_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISGE_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISGE_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISGE_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISGE_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISGE_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISLT_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISLT_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISLT_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISLT_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISLT_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISLT_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISLT_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISLT_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISLT_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISLT_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1324,16 +1584,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISGE_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISGE_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISGE_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISGE_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISGE_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISGE_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISGE_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISGE_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISGE_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISGE_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISLT_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISLT_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISLT_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISLT_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISLT_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISLT_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISLT_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISLT_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISLT_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISLT_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISLT_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1341,7 +1619,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_ISLE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISGE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1350,16 +1628,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_ISLE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_ISLE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_ISLE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_ISLE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_ISLE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_ISLE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_ISLE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_ISLE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_ISLE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_ISLE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISGE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1368,16 +1646,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_ISLE_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_ISLE_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_ISLE_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_ISLE_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_ISLE_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_ISLE_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_ISLE_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_ISLE_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_ISLE_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_ISLE_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISGE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1386,16 +1664,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_ISLE_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_ISLE_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_ISLE_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_ISLE_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_ISLE_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_ISLE_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_ISLE_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_ISLE_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_ISLE_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_ISLE_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISGE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISGE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISGE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISGE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISGE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISGE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISGE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISGE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISGE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISGE_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1404,16 +1682,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_ISLE_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_ISLE_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_ISLE_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_ISLE_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_ISLE_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_ISLE_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_ISLE_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_ISLE_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_ISLE_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_ISLE_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISGE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISGE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISGE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISGE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISGE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISGE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISGE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISGE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISGE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISGE_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISGE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1421,7 +1717,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_LOR_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISLE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1430,16 +1726,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_LOR_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_LOR_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MIN_LOR_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MIN_LOR_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MIN_LOR_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MIN_LOR_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MIN_LOR_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MIN_LOR_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MIN_LOR_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MIN_LOR_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MIN_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISLE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1448,16 +1744,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_LOR_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_LOR_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_MAX_LOR_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_MAX_LOR_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_MAX_LOR_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_MAX_LOR_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_MAX_LOR_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_MAX_LOR_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_MAX_LOR_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_MAX_LOR_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_MAX_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISLE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1466,16 +1762,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_LOR_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_LOR_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_LOR_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_LOR_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_LOR_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_LOR_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_LOR_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_LOR_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_LOR_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_LOR_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_ISLE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISLE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISLE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISLE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISLE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISLE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISLE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISLE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISLE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISLE_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1484,16 +1780,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_LOR_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_LOR_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_LOR_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_LOR_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_LOR_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_LOR_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_LOR_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_LOR_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_LOR_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_LOR_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_ISLE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISLE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISLE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISLE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISLE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISLE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISLE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISLE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISLE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISLE_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISLE_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1501,7 +1815,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_LAND_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_LOR_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1510,16 +1824,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_LAND_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_LAND_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_LAND_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_LAND_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_LAND_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_LAND_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_LAND_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_LAND_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_LAND_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_LAND_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LOR_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1528,16 +1842,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_LAND_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_LAND_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_LAND_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_LAND_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_LAND_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_LAND_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_LAND_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_LAND_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_LAND_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_LAND_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LOR_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1546,16 +1860,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_LAND_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_LAND_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_LAND_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_LAND_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_LAND_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_LAND_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_LAND_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_LAND_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_LAND_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_LAND_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_LOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LOR_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1564,16 +1878,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_LAND_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_LAND_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_LAND_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_LAND_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_LAND_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_LAND_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_LAND_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_LAND_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_LAND_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_LAND_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_LOR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LOR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LOR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LOR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LOR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LOR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LOR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LOR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LOR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LOR_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LOR_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1581,7 +1913,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_LXOR_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_LAND_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1590,16 +1922,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MIN_LXOR_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MIN_LXOR_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MIN_LXOR_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MIN_LXOR_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MIN_LXOR_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MIN_LXOR_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MIN_LXOR_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MIN_LXOR_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MIN_LXOR_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MIN_LXOR_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MIN_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LAND_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1608,16 +1940,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_MAX_LXOR_INT8     ) ;
-                            case GB_UINT8_code  : return (GxB_MAX_LXOR_UINT8    ) ;
-                            case GB_INT16_code  : return (GxB_MAX_LXOR_INT16    ) ;
-                            case GB_UINT16_code : return (GxB_MAX_LXOR_UINT16   ) ;
-                            case GB_INT32_code  : return (GxB_MAX_LXOR_INT32    ) ;
-                            case GB_UINT32_code : return (GxB_MAX_LXOR_UINT32   ) ;
-                            case GB_INT64_code  : return (GxB_MAX_LXOR_INT64    ) ;
-                            case GB_UINT64_code : return (GxB_MAX_LXOR_UINT64   ) ;
-                            case GB_FP32_code   : return (GxB_MAX_LXOR_FP32     ) ;
-                            case GB_FP64_code   : return (GxB_MAX_LXOR_FP64     ) ;
+                            case GB_INT8_code  : return (GxB_MAX_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LAND_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1626,16 +1958,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_PLUS_LXOR_INT8    ) ;
-                            case GB_UINT8_code  : return (GxB_PLUS_LXOR_UINT8   ) ;
-                            case GB_INT16_code  : return (GxB_PLUS_LXOR_INT16   ) ;
-                            case GB_UINT16_code : return (GxB_PLUS_LXOR_UINT16  ) ;
-                            case GB_INT32_code  : return (GxB_PLUS_LXOR_INT32   ) ;
-                            case GB_UINT32_code : return (GxB_PLUS_LXOR_UINT32  ) ;
-                            case GB_INT64_code  : return (GxB_PLUS_LXOR_INT64   ) ;
-                            case GB_UINT64_code : return (GxB_PLUS_LXOR_UINT64  ) ;
-                            case GB_FP32_code   : return (GxB_PLUS_LXOR_FP32    ) ;
-                            case GB_FP64_code   : return (GxB_PLUS_LXOR_FP64    ) ;
+                            case GB_INT8_code  : return (GxB_PLUS_LAND_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LAND_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LAND_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LAND_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LAND_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LAND_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LAND_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LAND_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LAND_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LAND_FP64     ) ;
                             default : ;
                         }
                         break ;
@@ -1644,16 +1976,132 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code   : return (GxB_TIMES_LXOR_INT8   ) ;
-                            case GB_UINT8_code  : return (GxB_TIMES_LXOR_UINT8  ) ;
-                            case GB_INT16_code  : return (GxB_TIMES_LXOR_INT16  ) ;
-                            case GB_UINT16_code : return (GxB_TIMES_LXOR_UINT16 ) ;
-                            case GB_INT32_code  : return (GxB_TIMES_LXOR_INT32  ) ;
-                            case GB_UINT32_code : return (GxB_TIMES_LXOR_UINT32 ) ;
-                            case GB_INT64_code  : return (GxB_TIMES_LXOR_INT64  ) ;
-                            case GB_UINT64_code : return (GxB_TIMES_LXOR_UINT64 ) ;
-                            case GB_FP32_code   : return (GxB_TIMES_LXOR_FP32   ) ;
-                            case GB_FP64_code   : return (GxB_TIMES_LXOR_FP64   ) ;
+                            case GB_INT8_code  : return (GxB_TIMES_LAND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LAND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LAND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LAND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LAND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LAND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LAND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LAND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LAND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LAND_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LAND_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    default : ;
+                }
+
+            case GB_LXOR_opcode : // with (5 monoids) x (10 nonboolean types)
+
+                switch (add_opcode)
+                {
+
+                    case GB_MIN_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_MIN_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LXOR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_MAX_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_MAX_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LXOR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_PLUS_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_PLUS_LXOR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LXOR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LXOR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LXOR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LXOR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LXOR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LXOR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LXOR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LXOR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LXOR_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_TIMES_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_TIMES_LXOR_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LXOR_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LXOR_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LXOR_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LXOR_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LXOR_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LXOR_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LXOR_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LXOR_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LXOR_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LXOR_FP64      ) ;
                             default : ;
                         }
                         break ;
@@ -1668,7 +2116,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 240 semirings with TxT->bool multiply operators
+        // 300 semirings with TxT->bool multiply operators
         //----------------------------------------------------------------------
 
         // x,y are one of the 10 non-Boolean types, z is Boolean
@@ -1676,7 +2124,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
         switch (mult_opcode)
         {
 
-            case GB_EQ_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_EQ_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1685,16 +2133,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_EQ_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_EQ_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_EQ_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_EQ_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_EQ_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_EQ_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_EQ_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_EQ_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_EQ_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_EQ_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_EQ_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_EQ_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_EQ_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_EQ_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_EQ_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_EQ_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_EQ_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_EQ_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_EQ_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_EQ_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1703,16 +2151,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_EQ_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_EQ_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_EQ_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_EQ_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_EQ_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_EQ_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_EQ_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_EQ_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_EQ_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_EQ_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_EQ_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_EQ_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_EQ_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_EQ_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_EQ_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_EQ_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_EQ_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_EQ_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_EQ_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_EQ_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1721,16 +2169,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_EQ_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_EQ_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_EQ_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_EQ_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_EQ_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_EQ_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_EQ_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_EQ_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_EQ_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_EQ_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_EQ_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_EQ_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_EQ_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_EQ_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_EQ_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_EQ_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_EQ_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_EQ_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_EQ_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_EQ_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1739,16 +2187,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_EQ_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_EQ_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_EQ_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_EQ_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_EQ_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_EQ_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_EQ_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_EQ_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_EQ_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_EQ_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_EQ_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_EQ_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_EQ_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_EQ_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_EQ_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_EQ_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_EQ_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_EQ_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_EQ_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_EQ_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_EQ_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_EQ_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_EQ_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_EQ_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_EQ_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_EQ_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_EQ_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_EQ_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_EQ_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_EQ_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1756,7 +2222,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_NE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_NE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1765,16 +2231,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_NE_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_NE_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_NE_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_NE_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_NE_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_NE_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_NE_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_NE_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_NE_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_NE_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_NE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_NE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_NE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_NE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_NE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_NE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_NE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_NE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_NE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_NE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1783,16 +2249,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_NE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_NE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_NE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_NE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_NE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_NE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_NE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_NE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_NE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_NE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_NE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_NE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_NE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_NE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_NE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_NE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_NE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_NE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_NE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_NE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1801,16 +2267,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_NE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_NE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_NE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_NE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_NE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_NE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_NE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_NE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_NE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_NE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_NE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_NE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_NE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_NE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_NE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_NE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_NE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_NE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_NE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_NE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1819,16 +2285,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_NE_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_NE_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_NE_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_NE_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_NE_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_NE_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_NE_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_NE_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_NE_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_NE_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_NE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_NE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_NE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_NE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_NE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_NE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_NE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_NE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_NE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_NE_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_NE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_NE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_NE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_NE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_NE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_NE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_NE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_NE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_NE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_NE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1836,7 +2320,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_GT_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_GT_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1845,16 +2329,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_GT_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_GT_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_GT_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_GT_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_GT_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_GT_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_GT_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_GT_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_GT_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_GT_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_GT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_GT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_GT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_GT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_GT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_GT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_GT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_GT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_GT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_GT_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1863,16 +2347,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_GT_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_GT_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_GT_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_GT_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_GT_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_GT_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_GT_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_GT_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_GT_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_GT_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_GT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_GT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_GT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_GT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_GT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_GT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_GT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_GT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_GT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_GT_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1881,16 +2365,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_GT_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_GT_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_GT_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_GT_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_GT_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_GT_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_GT_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_GT_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_GT_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_GT_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_GT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_GT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_GT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_GT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_GT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_GT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_GT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_GT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_GT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_GT_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1899,16 +2383,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_GT_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_GT_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_GT_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_GT_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_GT_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_GT_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_GT_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_GT_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_GT_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_GT_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_GT_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_GT_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_GT_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_GT_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_GT_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_GT_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_GT_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_GT_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_GT_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_GT_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_GT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_GT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_GT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_GT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_GT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_GT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_GT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_GT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_GT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_GT_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1916,7 +2418,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_LT_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_LT_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1925,16 +2427,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_LT_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_LT_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_LT_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_LT_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_LT_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_LT_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_LT_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_LT_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_LT_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_LT_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_LT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_LT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_LT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_LT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_LT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_LT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_LT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_LT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_LT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_LT_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1943,16 +2445,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_LT_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_LT_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_LT_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_LT_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_LT_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_LT_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_LT_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_LT_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_LT_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_LT_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_LT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_LT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_LT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_LT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_LT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_LT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_LT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_LT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_LT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_LT_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1961,16 +2463,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_LT_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_LT_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_LT_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_LT_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_LT_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_LT_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_LT_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_LT_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_LT_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_LT_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_LT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_LT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_LT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_LT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_LT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_LT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_LT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_LT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_LT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_LT_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -1979,16 +2481,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_LT_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_LT_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_LT_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_LT_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_LT_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_LT_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_LT_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_LT_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_LT_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_LT_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_LT_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_LT_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_LT_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_LT_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_LT_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_LT_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_LT_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_LT_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_LT_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_LT_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_LT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_LT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_LT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LT_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -1996,7 +2516,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_GE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_GE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -2005,16 +2525,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_GE_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_GE_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_GE_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_GE_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_GE_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_GE_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_GE_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_GE_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_GE_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_GE_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_GE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_GE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_GE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_GE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_GE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_GE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_GE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_GE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_GE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_GE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -2023,16 +2543,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_GE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_GE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_GE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_GE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_GE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_GE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_GE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_GE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_GE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_GE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_GE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_GE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_GE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_GE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_GE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_GE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_GE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_GE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_GE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_GE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -2041,16 +2561,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_GE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_GE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_GE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_GE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_GE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_GE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_GE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_GE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_GE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_GE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_GE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_GE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_GE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_GE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_GE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_GE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_GE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_GE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_GE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_GE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -2059,16 +2579,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_GE_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_GE_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_GE_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_GE_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_GE_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_GE_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_GE_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_GE_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_GE_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_GE_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_GE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_GE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_GE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_GE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_GE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_GE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_GE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_GE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_GE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_GE_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_GE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_GE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_GE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_GE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_GE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_GE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_GE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_GE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_GE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_GE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -2076,7 +2614,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
                     default : ;
                 }
 
-            case GB_LE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_LE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -2085,16 +2623,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LOR_LE_INT8       ) ;
-                            case GB_UINT8_code  : return (GxB_LOR_LE_UINT8      ) ;
-                            case GB_INT16_code  : return (GxB_LOR_LE_INT16      ) ;
-                            case GB_UINT16_code : return (GxB_LOR_LE_UINT16     ) ;
-                            case GB_INT32_code  : return (GxB_LOR_LE_INT32      ) ;
-                            case GB_UINT32_code : return (GxB_LOR_LE_UINT32     ) ;
-                            case GB_INT64_code  : return (GxB_LOR_LE_INT64      ) ;
-                            case GB_UINT64_code : return (GxB_LOR_LE_UINT64     ) ;
-                            case GB_FP32_code   : return (GxB_LOR_LE_FP32       ) ;
-                            case GB_FP64_code   : return (GxB_LOR_LE_FP64       ) ;
+                            case GB_INT8_code  : return (GxB_LOR_LE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_LE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_LE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_LE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_LE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_LE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_LE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_LE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_LE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_LE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -2103,16 +2641,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LAND_LE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LAND_LE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LAND_LE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LAND_LE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LAND_LE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LAND_LE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LAND_LE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LAND_LE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LAND_LE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LAND_LE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LAND_LE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_LE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_LE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_LE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_LE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_LE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_LE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_LE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_LE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_LE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -2121,16 +2659,16 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_LXOR_LE_INT8      ) ;
-                            case GB_UINT8_code  : return (GxB_LXOR_LE_UINT8     ) ;
-                            case GB_INT16_code  : return (GxB_LXOR_LE_INT16     ) ;
-                            case GB_UINT16_code : return (GxB_LXOR_LE_UINT16    ) ;
-                            case GB_INT32_code  : return (GxB_LXOR_LE_INT32     ) ;
-                            case GB_UINT32_code : return (GxB_LXOR_LE_UINT32    ) ;
-                            case GB_INT64_code  : return (GxB_LXOR_LE_INT64     ) ;
-                            case GB_UINT64_code : return (GxB_LXOR_LE_UINT64    ) ;
-                            case GB_FP32_code   : return (GxB_LXOR_LE_FP32      ) ;
-                            case GB_FP64_code   : return (GxB_LXOR_LE_FP64      ) ;
+                            case GB_INT8_code  : return (GxB_LXOR_LE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_LE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_LE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_LE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_LE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_LE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_LE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_LE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_LE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_LE_FP64       ) ;
                             default : ;
                         }
                         break ;
@@ -2139,16 +2677,34 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code   : return (GxB_EQ_LE_INT8        ) ;
-                            case GB_UINT8_code  : return (GxB_EQ_LE_UINT8       ) ;
-                            case GB_INT16_code  : return (GxB_EQ_LE_INT16       ) ;
-                            case GB_UINT16_code : return (GxB_EQ_LE_UINT16      ) ;
-                            case GB_INT32_code  : return (GxB_EQ_LE_INT32       ) ;
-                            case GB_UINT32_code : return (GxB_EQ_LE_UINT32      ) ;
-                            case GB_INT64_code  : return (GxB_EQ_LE_INT64       ) ;
-                            case GB_UINT64_code : return (GxB_EQ_LE_UINT64      ) ;
-                            case GB_FP32_code   : return (GxB_EQ_LE_FP32        ) ;
-                            case GB_FP64_code   : return (GxB_EQ_LE_FP64        ) ;
+                            case GB_INT8_code  : return (GxB_EQ_LE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_LE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_LE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_LE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_LE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_LE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_LE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_LE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_LE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_LE_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_LE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_LE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_LE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LE_FP64        ) ;
                             default : ;
                         }
                         break ;
@@ -2163,7 +2719,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 40 purely Boolean semirings
+        // 55 purely Boolean semirings
         //----------------------------------------------------------------------
 
         // x,y,z are all Boolean, and all operators are Boolean
@@ -2175,121 +2731,133 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_FIRST_BOOL    ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_FIRST_BOOL   ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_FIRST_BOOL   ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_FIRST_BOOL     ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_FIRST_BOOL     ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_FIRST_BOOL    ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_FIRST_BOOL    ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_FIRST_BOOL      ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_FIRST_BOOL     ) ;
                     default : ;
                 }
-                break ;
 
             case GB_SECOND_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_SECOND_BOOL   ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_SECOND_BOOL  ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_SECOND_BOOL  ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_SECOND_BOOL    ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_SECOND_BOOL    ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_SECOND_BOOL   ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_SECOND_BOOL   ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_SECOND_BOOL     ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_SECOND_BOOL    ) ;
+                    default : ;
+                }
+
+            case GB_PAIR_opcode :
+
+                switch (add_opcode)
+                {
+                    case GB_LOR_opcode        : return (GxB_LOR_PAIR_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_PAIR_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_PAIR_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_PAIR_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_PAIR_BOOL      ) ;
                     default : ;
                 }
-                break ;
 
             case GB_LOR_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_LOR_BOOL      ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_LOR_BOOL     ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_LOR_BOOL     ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_LOR_BOOL       ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LOR_BOOL       ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LOR_BOOL      ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LOR_BOOL      ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LOR_BOOL        ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LOR_BOOL       ) ;
                     default : ;
                 }
-                break ;
 
             case GB_LAND_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_LAND_BOOL     ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_LAND_BOOL    ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_LAND_BOOL    ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_LAND_BOOL      ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LAND_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LAND_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LAND_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LAND_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LAND_BOOL      ) ;
                     default : ;
                 }
-                break ;
 
             case GB_LXOR_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_LXOR_BOOL     ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_LXOR_BOOL    ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_LXOR_BOOL    ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_LXOR_BOOL      ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LXOR_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LXOR_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LXOR_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LXOR_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LXOR_BOOL      ) ;
                     default : ;
                 }
-                break ;
 
             case GB_EQ_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_EQ_BOOL       ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_EQ_BOOL      ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_EQ_BOOL      ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_EQ_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_EQ_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_EQ_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_EQ_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_EQ_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_EQ_BOOL        ) ;
                     default : ;
                 }
-                break ;
 
             case GB_GT_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_GT_BOOL       ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_GT_BOOL      ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_GT_BOOL      ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_GT_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_GT_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_GT_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_GT_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_GT_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_GT_BOOL        ) ;
                     default : ;
                 }
-                break ;
 
             case GB_LT_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_LT_BOOL       ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_LT_BOOL      ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_LT_BOOL      ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_LT_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LT_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LT_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LT_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LT_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LT_BOOL        ) ;
                     default : ;
                 }
-                break ;
 
             case GB_GE_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_GE_BOOL       ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_GE_BOOL      ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_GE_BOOL      ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_GE_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_GE_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_GE_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_GE_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_GE_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_GE_BOOL        ) ;
                     default : ;
                 }
-                break ;
 
             case GB_LE_opcode :
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode         : return (GxB_LOR_LE_BOOL       ) ;
-                    case GB_LAND_opcode        : return (GxB_LAND_LE_BOOL      ) ;
-                    case GB_LXOR_opcode        : return (GxB_LXOR_LE_BOOL      ) ;
-                    case GB_EQ_opcode          : return (GxB_EQ_LE_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LE_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LE_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LE_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LE_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LE_BOOL        ) ;
                     default : ;
                 }
-                break ;
 
             default : ;
         }
@@ -2299,7 +2867,7 @@ GrB_Semiring gb_semiring            // built-in semiring, or NULL if error
     // not a built-in semiring; FUTURE: add complex semirings
     //--------------------------------------------------------------------------
 
-    ERROR ("invalid semiring")
+    ERROR ("invalid semiring (not found)")
     return (NULL) ;
 }
 
diff --git a/GraphBLAS/@GrB/private/util/gb_string_and_type_to_binop.c b/GraphBLAS/@GrB/private/util/gb_string_and_type_to_binop.c
index 07d5a16bd5..25156a971e 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_and_type_to_binop.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_and_type_to_binop.c
@@ -2,7 +2,7 @@
 // gb_string_and_type_to_binop: get a GraphBLAS operator from a string and type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,7 +10,7 @@
 #include "gb_matlab.h"
 
 // op_name: a MATLAB string defining the operator name (25 kinds):
-// 10: 1st, 2nd, min, max, +, -, rminus, *, /, \
+// 11: 1st, 2nd, pair, min, max, +, -, rminus, *, /, \
 //  6: iseq, isne, isgt, islt, isge, isle,
 //  6: ==, ~=, >, <, >=, <=,
 //  3: ||, &&, xor
@@ -19,6 +19,7 @@
 //
 //      1st   first
 //      2nd   second
+//      pair
 //      +     plus
 //      -     minus
 //      *     times
@@ -34,7 +35,7 @@
 //      &&    &     and land
 //      xor   lxor
 
-// Total # of ops: 25*11 = 275, not including GrB_LOR, GrB_LAND, GrB_XOR,
+// Total # of ops: 26*11 = 286, not including GrB_LOR, GrB_LAND, GrB_XOR,
 // which are equivalent to the GxB_*_BOOL versions.
 
 // FUTURE: add complex operators
@@ -85,6 +86,44 @@ GrB_BinaryOp gb_string_and_type_to_binop    // return op from string and type
         if (type == gb_complex_type) return (...) ;
         #endif
 
+    }
+    else if (MATCH (op_name, "pair"))
+    { 
+
+        if (type == GrB_BOOL  ) return (GxB_PAIR_BOOL  ) ;
+        if (type == GrB_INT8  ) return (GxB_PAIR_INT8  ) ;
+        if (type == GrB_INT16 ) return (GxB_PAIR_INT16 ) ;
+        if (type == GrB_INT32 ) return (GxB_PAIR_INT32 ) ;
+        if (type == GrB_INT64 ) return (GxB_PAIR_INT64 ) ;
+        if (type == GrB_UINT8 ) return (GxB_PAIR_UINT8 ) ;
+        if (type == GrB_UINT16) return (GxB_PAIR_UINT16) ;
+        if (type == GrB_UINT32) return (GxB_PAIR_UINT32) ;
+        if (type == GrB_UINT64) return (GxB_PAIR_UINT64) ;
+        if (type == GrB_FP32  ) return (GxB_PAIR_FP32  ) ;
+        if (type == GrB_FP64  ) return (GxB_PAIR_FP64  ) ;
+        #ifdef GB_COMPLEX_TYPE
+        if (type == gb_complex_type) return (... ) ;
+        #endif
+
+    }
+    else if (MATCH (op_name, "any"))
+    { 
+
+        if (type == GrB_BOOL  ) return (GxB_ANY_BOOL  ) ;
+        if (type == GrB_INT8  ) return (GxB_ANY_INT8  ) ;
+        if (type == GrB_INT16 ) return (GxB_ANY_INT16 ) ;
+        if (type == GrB_INT32 ) return (GxB_ANY_INT32 ) ;
+        if (type == GrB_INT64 ) return (GxB_ANY_INT64 ) ;
+        if (type == GrB_UINT8 ) return (GxB_ANY_UINT8 ) ;
+        if (type == GrB_UINT16) return (GxB_ANY_UINT16) ;
+        if (type == GrB_UINT32) return (GxB_ANY_UINT32) ;
+        if (type == GrB_UINT64) return (GxB_ANY_UINT64) ;
+        if (type == GrB_FP32  ) return (GxB_ANY_FP32  ) ;
+        if (type == GrB_FP64  ) return (GxB_ANY_FP64  ) ;
+        #ifdef GB_COMPLEX_TYPE
+        if (type == gb_complex_type) return (... ) ;
+        #endif
+
     }
     else if (MATCH (op_name, "min"))
     { 
diff --git a/GraphBLAS/@GrB/private/util/gb_string_and_type_to_unop.c b/GraphBLAS/@GrB/private/util/gb_string_and_type_to_unop.c
index e302d7eceb..c2859b0e89 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_and_type_to_unop.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_and_type_to_unop.c
@@ -2,7 +2,7 @@
 // gb_string_and_type_to_unop: get a GraphBLAS operator from a string and type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_binop.c b/GraphBLAS/@GrB/private/util/gb_string_to_binop.c
index d45d6fbe9a..b23ad81d80 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_binop.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_binop.c
@@ -2,7 +2,7 @@
 // gb_string_to_binop: get a GraphBLAS operator from a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_monoid.c b/GraphBLAS/@GrB/private/util/gb_string_to_monoid.c
index b77cb9430a..c199b07475 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_monoid.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_monoid.c
@@ -2,7 +2,7 @@
 // gb_string_to_monoid: get a GraphBLAS monoid from a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_selectop.c b/GraphBLAS/@GrB/private/util/gb_string_to_selectop.c
index 0f9b3bf869..a5f7f2e80f 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_selectop.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_selectop.c
@@ -2,7 +2,7 @@
 // gb_string_to_selectop: get a GraphBLAS select operator from a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_semiring.c b/GraphBLAS/@GrB/private/util/gb_string_to_semiring.c
index 00bd5cfb42..c7f923e830 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_semiring.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_semiring.c
@@ -2,7 +2,7 @@
 // gb_string_to_semiring: convert a string to a GraphBLAS semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -60,7 +60,7 @@ GrB_Semiring gb_string_to_semiring      // return a semiring from a string
     }
 
     GrB_BinaryOp mult = gb_string_and_type_to_binop (mult_name, mult_type) ;
-    CHECK_ERROR (mult == NULL, "invalid semiring") ;
+    CHECK_ERROR (mult == NULL, "invalid semiring (unknown multipy operator)") ;
 
     //--------------------------------------------------------------------------
     // get the add operator
@@ -69,7 +69,7 @@ GrB_Semiring gb_string_to_semiring      // return a semiring from a string
     GrB_Type add_type = mult->ztype ;
 
     GrB_BinaryOp add = gb_string_and_type_to_binop (add_name, add_type) ;
-    CHECK_ERROR (add == NULL, "invalid semiring") ;
+    CHECK_ERROR (add == NULL, "invalid semiring (unknown add operator)") ;
 
     //--------------------------------------------------------------------------
     // convert the add and mult operators to a semiring
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_type.c b/GraphBLAS/@GrB/private/util/gb_string_to_type.c
index 84cd40dcc5..dc751b8d19 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_type.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_type.c
@@ -2,7 +2,7 @@
 // gb_string_to_type: return the GraphBLAS type from a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_string_to_unop.c b/GraphBLAS/@GrB/private/util/gb_string_to_unop.c
index 0f1919a85f..605129d2aa 100644
--- a/GraphBLAS/@GrB/private/util/gb_string_to_unop.c
+++ b/GraphBLAS/@GrB/private/util/gb_string_to_unop.c
@@ -2,7 +2,7 @@
 // gb_string_to_unop: get a GraphBLAS unary operator from a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_type_to_mxstring.c b/GraphBLAS/@GrB/private/util/gb_type_to_mxstring.c
index cd27abdcee..cbab529a13 100644
--- a/GraphBLAS/@GrB/private/util/gb_type_to_mxstring.c
+++ b/GraphBLAS/@GrB/private/util/gb_type_to_mxstring.c
@@ -2,7 +2,7 @@
 // gb_type_to_mxstring: create a MATLAB string from a GraphBLAS type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_typecast.c b/GraphBLAS/@GrB/private/util/gb_typecast.c
index cedc6fd25d..9fa6f051f5 100644
--- a/GraphBLAS/@GrB/private/util/gb_typecast.c
+++ b/GraphBLAS/@GrB/private/util/gb_typecast.c
@@ -2,7 +2,7 @@
 // gb_typecast: typecast a GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/private/util/gb_usage.c b/GraphBLAS/@GrB/private/util/gb_usage.c
index 9ce96d4975..64ec69c921 100644
--- a/GraphBLAS/@GrB/private/util/gb_usage.c
+++ b/GraphBLAS/@GrB/private/util/gb_usage.c
@@ -2,12 +2,13 @@
 // gb_usage: check usage and make sure GrB.init has been called
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 #include "gb_matlab.h"
+#include "GB_printf.h"
 
 void gb_usage       // check usage and make sure GrB.init has been called
 (
@@ -22,7 +23,24 @@ void gb_usage       // check usage and make sure GrB.init has been called
 
     if (!GB_Global_GrB_init_called_get ( ))
     {
-        ERROR ("GrB.init must called before using any GraphBLAS function") ;
+
+        //----------------------------------------------------------------------
+        // initialize GraphBLAS
+        //----------------------------------------------------------------------
+
+        GB_printf_function = mexPrintf ;
+
+        OK (GxB_init (GrB_NONBLOCKING, mxMalloc, mxCalloc, mxRealloc, mxFree,
+            false)) ;
+
+        // MATLAB matrices are stored by column
+        OK (GxB_Global_Option_set (GxB_FORMAT, GxB_BY_COL)) ;
+
+        // print short format by default
+        GB_Global_print_format_set (1) ;
+
+        // print 1-based indices
+        GB_Global_print_one_based_set (true) ;
     }
 
     //--------------------------------------------------------------------------
diff --git a/GraphBLAS/@GrB/prod.m b/GraphBLAS/@GrB/prod.m
index 6b5dd2a68e..b4db25a60b 100644
--- a/GraphBLAS/@GrB/prod.m
+++ b/GraphBLAS/@GrB/prod.m
@@ -19,8 +19,10 @@
 % products to be computed, and the NaN behavior can be specified.  The
 % GraphBLAS prod (G,...) uses only a type of 'native', and a nanflag of
 % 'includenan'.  See 'help prod' for more details.
+%
+% See also GrB/max, GrB/min, GrB/sum.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/prune.m b/GraphBLAS/@GrB/prune.m
index af8a293557..8b97f42a05 100644
--- a/GraphBLAS/@GrB/prune.m
+++ b/GraphBLAS/@GrB/prune.m
@@ -5,7 +5,7 @@
 %
 % See also GrB/full, GrB.select, GrB.prune.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 1)
diff --git a/GraphBLAS/@GrB/random.m b/GraphBLAS/@GrB/random.m
index f48e5b5c41..bc1f488dfa 100644
--- a/GraphBLAS/@GrB/random.m
+++ b/GraphBLAS/@GrB/random.m
@@ -88,7 +88,7 @@
 %
 % See also sprand, sprandn, sprandsym, GrB/sprand, GrB/sprandn, GrB/sprandsym.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % defaults
@@ -125,7 +125,7 @@
     % C = GrB.random (A, ...) ;
     A = varargin {1} ;
     [m, n] = size (A) ;
-    if (symmetric & (m ~= n))
+    if (symmetric && (m ~= n))
         gb_error ('input matrix must be square') ;
     end
     [I, J] = GrB.extracttuples (A, desc) ;
diff --git a/GraphBLAS/@GrB/rdivide.m b/GraphBLAS/@GrB/rdivide.m
index 5ff7415098..79fc41fb82 100644
--- a/GraphBLAS/@GrB/rdivide.m
+++ b/GraphBLAS/@GrB/rdivide.m
@@ -12,7 +12,7 @@
 %
 % See also rdivide, GrB.emult, GrB.eadd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
@@ -49,5 +49,5 @@
     end
 end
 
-C = GrB.eadd (A, '/', B) ;
+C = GrB.emult (A, '/', B) ;
 
diff --git a/GraphBLAS/@GrB/real.m b/GraphBLAS/@GrB/real.m
index 6a7ebbb8ed..6f57beef32 100644
--- a/GraphBLAS/@GrB/real.m
+++ b/GraphBLAS/@GrB/real.m
@@ -3,8 +3,10 @@
 % C = real (G) returns the real part of the GraphBLAS matrix G.  Since
 % all GraphBLAS matrices are currently real, real (G) is just G.  Complex
 % support will be added in the future.
+%
+% See also GrB/conj.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = G ;
diff --git a/GraphBLAS/@GrB/reduce.m b/GraphBLAS/@GrB/reduce.m
index 5ffa3f72b2..6e31e54347 100644
--- a/GraphBLAS/@GrB/reduce.m
+++ b/GraphBLAS/@GrB/reduce.m
@@ -31,7 +31,7 @@
 
 % FUTURE: add complex monoids.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/repmat.m b/GraphBLAS/@GrB/repmat.m
index 4680578e7f..95412405d2 100644
--- a/GraphBLAS/@GrB/repmat.m
+++ b/GraphBLAS/@GrB/repmat.m
@@ -6,7 +6,7 @@
 %
 % See also kron, GrB.kronecker.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 3)
diff --git a/GraphBLAS/@GrB/reshape.m b/GraphBLAS/@GrB/reshape.m
index c6d1bb7402..b30151a85e 100644
--- a/GraphBLAS/@GrB/reshape.m
+++ b/GraphBLAS/@GrB/reshape.m
@@ -3,8 +3,10 @@
 % C = reshape (G, m, n) or C = reshape (G, [m n]) returns the m-by-n
 % matrix whose elements are taken columnwise from G.  The matrix G must
 % have numel (G) == m*n.  That is numel (G) == numel (C) must be true.
+%
+% See also numel, squeeze.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mold, nold] = size (G) ;
diff --git a/GraphBLAS/@GrB/round.m b/GraphBLAS/@GrB/round.m
index fd444783f1..cf60cd104a 100644
--- a/GraphBLAS/@GrB/round.m
+++ b/GraphBLAS/@GrB/round.m
@@ -7,7 +7,7 @@
 
 % FUTURE: this will be much faster as a mexFunction.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isfloat (G) && GrB.entries (G) > 0)
diff --git a/GraphBLAS/@GrB/select.m b/GraphBLAS/@GrB/select.m
index 59c851e0d3..70a72bcdb3 100644
--- a/GraphBLAS/@GrB/select.m
+++ b/GraphBLAS/@GrB/select.m
@@ -86,7 +86,7 @@
 %
 % See also tril, triu, diag.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/selectopinfo.m b/GraphBLAS/@GrB/selectopinfo.m
index 803ff44236..87d0daf8f7 100644
--- a/GraphBLAS/@GrB/selectopinfo.m
+++ b/GraphBLAS/@GrB/selectopinfo.m
@@ -38,7 +38,7 @@ function selectopinfo (op)
 % See also GrB.binopinfo, GrB.descriptorinfo, GrB.monoidinfo,
 % GrB.semiringinfo, GrB.unopinfo.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/semiringinfo.m b/GraphBLAS/@GrB/semiringinfo.m
index 4f2884538b..71cf48a7e0 100644
--- a/GraphBLAS/@GrB/semiringinfo.m
+++ b/GraphBLAS/@GrB/semiringinfo.m
@@ -40,7 +40,7 @@ function semiringinfo (s, type)
 
 % FUTURE: add complex semirings
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/sign.m b/GraphBLAS/@GrB/sign.m
index 43b73af32c..a825cb353b 100644
--- a/GraphBLAS/@GrB/sign.m
+++ b/GraphBLAS/@GrB/sign.m
@@ -7,7 +7,7 @@
 %
 % See also abs.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = spones (GrB.select (G, '>0')) - spones (GrB.select (G, '<0')) ;
diff --git a/GraphBLAS/@GrB/single.m b/GraphBLAS/@GrB/single.m
index 835607577f..8e2f4b4f14 100644
--- a/GraphBLAS/@GrB/single.m
+++ b/GraphBLAS/@GrB/single.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, logical, int8, int16, int32, int64,
 % uint8, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'single') ;
diff --git a/GraphBLAS/@GrB/size.m b/GraphBLAS/@GrB/size.m
index 0b53906aa6..0fea3ad028 100644
--- a/GraphBLAS/@GrB/size.m
+++ b/GraphBLAS/@GrB/size.m
@@ -5,7 +5,7 @@
 %
 % See also GrB/length, GrB/numel.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargout <= 1)
diff --git a/GraphBLAS/@GrB/sparse.m b/GraphBLAS/@GrB/sparse.m
index b16c7e7c2a..01c189bcd4 100644
--- a/GraphBLAS/@GrB/sparse.m
+++ b/GraphBLAS/@GrB/sparse.m
@@ -5,7 +5,7 @@
 %
 % See also GrB/issparse, GrB/full, GrB.type, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = G ;
diff --git a/GraphBLAS/@GrB/speye.m b/GraphBLAS/@GrB/speye.m
index c6d055ffe9..bf767b1b0c 100644
--- a/GraphBLAS/@GrB/speye.m
+++ b/GraphBLAS/@GrB/speye.m
@@ -5,7 +5,7 @@
 %
 % See also GrB.eye.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.eye (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/spfun.m b/GraphBLAS/@GrB/spfun.m
index b31be38975..c451918175 100644
--- a/GraphBLAS/@GrB/spfun.m
+++ b/GraphBLAS/@GrB/spfun.m
@@ -7,7 +7,7 @@
 % FUTURE: this would be much faster as a mexFunction, but calling feval
 % from inside a mexFunction would not be trivial (perhaps not possible).
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m, n] = size (G) ;
diff --git a/GraphBLAS/@GrB/spones.m b/GraphBLAS/@GrB/spones.m
index eaba7f509b..09b9f9b286 100644
--- a/GraphBLAS/@GrB/spones.m
+++ b/GraphBLAS/@GrB/spones.m
@@ -14,7 +14,7 @@
 %
 % See also spfun, GrB.apply.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 1)
diff --git a/GraphBLAS/@GrB/sprandsym.m b/GraphBLAS/@GrB/sprandsym.m
index 996d01fe66..f233c3d022 100644
--- a/GraphBLAS/@GrB/sprandsym.m
+++ b/GraphBLAS/@GrB/sprandsym.m
@@ -31,7 +31,7 @@
 %
 % See also sprand, sprandn, GrB/sprand, GrB.random.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % make the default 'normal' instead of 'uniform'
diff --git a/GraphBLAS/@GrB/sprintf.m b/GraphBLAS/@GrB/sprintf.m
index e73ea48bf6..a1b06e73cc 100644
--- a/GraphBLAS/@GrB/sprintf.m
+++ b/GraphBLAS/@GrB/sprintf.m
@@ -6,7 +6,7 @@
 %
 % See also fprintf, sprintf, GrB/fprintf.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = gb_printf_helper ('sprintf', varargin {:}) ;
diff --git a/GraphBLAS/@GrB/sqrt.m b/GraphBLAS/@GrB/sqrt.m
index abad7207da..6e4fd5255c 100644
--- a/GraphBLAS/@GrB/sqrt.m
+++ b/GraphBLAS/@GrB/sqrt.m
@@ -6,7 +6,7 @@
 %
 % See also GrB.apply.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = G.^(.5) ;
diff --git a/GraphBLAS/@GrB/subassign.m b/GraphBLAS/@GrB/subassign.m
index a3d9b4c483..5abc53d149 100644
--- a/GraphBLAS/@GrB/subassign.m
+++ b/GraphBLAS/@GrB/subassign.m
@@ -52,7 +52,7 @@
 %
 % See also GrB.assign, subsasgn.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/subsasgn.m b/GraphBLAS/@GrB/subsasgn.m
index e886516468..859eac276f 100644
--- a/GraphBLAS/@GrB/subsasgn.m
+++ b/GraphBLAS/@GrB/subsasgn.m
@@ -34,7 +34,7 @@
 
 % FUTURE: add linear indexing, and allow the matrix to grow in size.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (~isequal (S.type, '()'))
diff --git a/GraphBLAS/@GrB/subsref.m b/GraphBLAS/@GrB/subsref.m
index 02e3e473a8..7264c53ad5 100644
--- a/GraphBLAS/@GrB/subsref.m
+++ b/GraphBLAS/@GrB/subsref.m
@@ -32,7 +32,7 @@
 
 % FUTURE: add linear indexing.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (length (S) > 1)
diff --git a/GraphBLAS/@GrB/sum.m b/GraphBLAS/@GrB/sum.m
index 5b15c481e1..0db0523522 100644
--- a/GraphBLAS/@GrB/sum.m
+++ b/GraphBLAS/@GrB/sum.m
@@ -18,8 +18,10 @@
 % sums to be computed, and the NaN behavior can be specified.  The
 % GraphBLAS sum (G,...) uses only a type of 'native', and a nanflag of
 % 'includenan'.  See 'help sum' for more details.
+%
+% See also GrB/prod, GrB/max, GrB/min.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isequal (GrB.type (G), 'logical'))
diff --git a/GraphBLAS/@GrB/symamd.m b/GraphBLAS/@GrB/symamd.m
index 27e5524c82..75b4cc2e7a 100644
--- a/GraphBLAS/@GrB/symamd.m
+++ b/GraphBLAS/@GrB/symamd.m
@@ -4,7 +4,7 @@
 %
 % See also symamd, GrB/amd, GrB/colamd, GrB/symrcm.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [p, varargout{1:nargout-1}] = symamd (double (G), varargin {:}) ;
diff --git a/GraphBLAS/@GrB/symrcm.m b/GraphBLAS/@GrB/symrcm.m
index 794b9d6cac..04140c662c 100644
--- a/GraphBLAS/@GrB/symrcm.m
+++ b/GraphBLAS/@GrB/symrcm.m
@@ -4,7 +4,7 @@
 %
 % See also symrcm, GrB/amd, GrB/colamd, GrB/symamd.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 p = builtin ('symrcm', logical (G)) ;
diff --git a/GraphBLAS/@GrB/threads.m b/GraphBLAS/@GrB/threads.m
index f7c40cf924..5769dc1fdd 100644
--- a/GraphBLAS/@GrB/threads.m
+++ b/GraphBLAS/@GrB/threads.m
@@ -31,7 +31,7 @@
 %
 % See also feature, maxNumCompThreads, GrB.chunk.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 nthreads = gbthreads (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/times.m b/GraphBLAS/@GrB/times.m
index bdf82ed2dc..c46df016a2 100644
--- a/GraphBLAS/@GrB/times.m
+++ b/GraphBLAS/@GrB/times.m
@@ -10,7 +10,7 @@
 %
 % See also GrB/mtimes, GrB.emult, GrB.mxm.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/trans.m b/GraphBLAS/@GrB/trans.m
index 3f5eda386a..da3df21a50 100644
--- a/GraphBLAS/@GrB/trans.m
+++ b/GraphBLAS/@GrB/trans.m
@@ -18,7 +18,7 @@
 %
 % See also transpose, ctranspose.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/transpose.m b/GraphBLAS/@GrB/transpose.m
index e63850114c..2e92c855b4 100644
--- a/GraphBLAS/@GrB/transpose.m
+++ b/GraphBLAS/@GrB/transpose.m
@@ -3,7 +3,7 @@
 %
 % See also GrB.trans, ctranspose.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.trans (G) ;
diff --git a/GraphBLAS/@GrB/tricount.m b/GraphBLAS/@GrB/tricount.m
index 0c0f4d5bb0..702e2bf6a9 100644
--- a/GraphBLAS/@GrB/tricount.m
+++ b/GraphBLAS/@GrB/tricount.m
@@ -4,8 +4,8 @@
 % spones (A) must be symmetric; results are undefined if spones (A) is
 % unsymmetric.  Diagonal entries are ignored.
 %
-% To check the input matrix A, use GrB.tricount (A, 'check').  This check takes
-% additional time so by default the input is not checked.
+% To check the input matrix A, use GrB.tricount (A, 'check').  This check
+% takes additional time so by default the input is not checked.
 %
 % See also GrB.ktruss.
 
@@ -19,29 +19,30 @@
     check = isequal (check, 'check') ;
 end
 
-int_type = 'int64' ;
-if (n < intmax ('int32'))
-    int_type = 'int32' ;
+if (check && ~issymmetric (spones (A)))
+    gb_error ('pattern of A must be symmetric') ;
 end
-A = spones (A, int_type) ;
 
-if (check && ~issymmetric (A))
-    gb_error ('spones (A) must be symmetric') ;
-end
-
-C = GrB (n, n, int_type, GrB.format (A)) ;
+% C, L, and U will have the same format as A
+C = GrB (n, n, 'int64', GrB.format (A)) ;
 L = tril (A, -1) ;
 U = triu (A, 1) ;
 
 % Inside GraphBLAS, the methods below are identical.  For example, L stored by
-% row is the same data structure as U stored by column.
+% row is the same data structure as U stored by column.  Both use the
+% SandiaDot2 method as defined in LAGraph (case 6), which is typically the
+% fastest of the methods in LAGraph_tricount.
+
+desc.mask = 'structural' ;
 
 if (GrB.isbyrow (A))
-    % C<L> = L*U'
-    C = GrB.mxm (C, L, '+.*', L, U, struct ('in1', 'transpose')) ;
+    % C<U> = U*L': SandiaDot2 method
+    desc.in1 = 'transpose' ;
+    C = GrB.mxm (C, U, '+.pair.int64', U, L, desc) ;
 else
-    % C<U> = L'*U
-    C = GrB.mxm (C, U, '+.*', L, U, struct ('in0', 'transpose')) ;
+    % C<U> = L'*U: SandiaDot2 method
+    desc.in0 = 'transpose' ;
+    C = GrB.mxm (C, U, '+.pair.int64', L, U, desc) ;
 end
 
 s = full (double (GrB.reduce ('+.int64', C))) ;
diff --git a/GraphBLAS/@GrB/tril.m b/GraphBLAS/@GrB/tril.m
index f8f7e799b5..94ab552239 100644
--- a/GraphBLAS/@GrB/tril.m
+++ b/GraphBLAS/@GrB/tril.m
@@ -7,7 +7,7 @@
 %
 % See also GrB/triu.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/triu.m b/GraphBLAS/@GrB/triu.m
index 7d235f4dd9..ee71989fea 100644
--- a/GraphBLAS/@GrB/triu.m
+++ b/GraphBLAS/@GrB/triu.m
@@ -7,7 +7,7 @@
 %
 % See also GrB/tril.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 2)
diff --git a/GraphBLAS/@GrB/true.m b/GraphBLAS/@GrB/true.m
index d3aa3bae07..bfd42187ff 100644
--- a/GraphBLAS/@GrB/true.m
+++ b/GraphBLAS/@GrB/true.m
@@ -5,7 +5,7 @@
 %
 % See also zeros, false, true.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.subassign (false (varargin {:}), true) ;
diff --git a/GraphBLAS/@GrB/type.m b/GraphBLAS/@GrB/type.m
index 9b0b2d51e0..0a3a54972d 100644
--- a/GraphBLAS/@GrB/type.m
+++ b/GraphBLAS/@GrB/type.m
@@ -13,7 +13,7 @@
 %
 % See also class, GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isa (X, 'GrB'))
diff --git a/GraphBLAS/@GrB/uint16.m b/GraphBLAS/@GrB/uint16.m
index 17efbf3d5b..0808b0768a 100644
--- a/GraphBLAS/@GrB/uint16.m
+++ b/GraphBLAS/@GrB/uint16.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint8, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'uint16') ;
diff --git a/GraphBLAS/@GrB/uint32.m b/GraphBLAS/@GrB/uint32.m
index 564c08757f..ce91d05ebd 100644
--- a/GraphBLAS/@GrB/uint32.m
+++ b/GraphBLAS/@GrB/uint32.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint8, uint16, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'uint32') ;
diff --git a/GraphBLAS/@GrB/uint64.m b/GraphBLAS/@GrB/uint64.m
index 922c8c3c12..ced49a8e95 100644
--- a/GraphBLAS/@GrB/uint64.m
+++ b/GraphBLAS/@GrB/uint64.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint8, uint16, and uint32.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'uint64') ;
diff --git a/GraphBLAS/@GrB/uint8.m b/GraphBLAS/@GrB/uint8.m
index c3e3d8ea28..2a1a8ee31d 100644
--- a/GraphBLAS/@GrB/uint8.m
+++ b/GraphBLAS/@GrB/uint8.m
@@ -10,7 +10,7 @@
 % See also GrB, double, complex, single, logical, int8, int16, int32,
 % int64, uint16, uint32, and uint64.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = gbfull (G.opaque, 'uint8') ;
diff --git a/GraphBLAS/@GrB/uminus.m b/GraphBLAS/@GrB/uminus.m
index d94415bf05..5ee2bd7791 100644
--- a/GraphBLAS/@GrB/uminus.m
+++ b/GraphBLAS/@GrB/uminus.m
@@ -1,8 +1,10 @@
 function C = uminus (G)
 %UMINUS negate a GraphBLAS sparse matrix.
 % C = -G negates the entries of a GraphBLAS matrix.
+%
+% See also GrB.apply, GrB/minus, GrB/uplus.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = GrB.apply ('-', G) ;
diff --git a/GraphBLAS/@GrB/unopinfo.m b/GraphBLAS/@GrB/unopinfo.m
index 9c2f7a0fe9..acd4b2a3b3 100644
--- a/GraphBLAS/@GrB/unopinfo.m
+++ b/GraphBLAS/@GrB/unopinfo.m
@@ -48,7 +48,7 @@ function unopinfo (op, type)
 
 % FUTURE: add complex unary operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
diff --git a/GraphBLAS/@GrB/uplus.m b/GraphBLAS/@GrB/uplus.m
index 6f5f3ed96d..b9cedbc971 100644
--- a/GraphBLAS/@GrB/uplus.m
+++ b/GraphBLAS/@GrB/uplus.m
@@ -2,8 +2,10 @@
 %UPLUS C = +G.
 % C = +G is the unary plus operator.  It does nothing to a GraphBLAS
 % matrix, so C is just equal to G.
+%
+% See also GrB/uminus.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 C = G ;
diff --git a/GraphBLAS/@GrB/vertcat.m b/GraphBLAS/@GrB/vertcat.m
index 6cc7009670..3ca622788f 100644
--- a/GraphBLAS/@GrB/vertcat.m
+++ b/GraphBLAS/@GrB/vertcat.m
@@ -9,7 +9,7 @@
 % FUTURE: this will be much faster when it is a mexFunction.
 % The version below requires a sort in GrB.build.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % determine the size of each matrix and the size of the result
diff --git a/GraphBLAS/@GrB/vreduce.m b/GraphBLAS/@GrB/vreduce.m
index b75c83ff76..05297f6a2f 100644
--- a/GraphBLAS/@GrB/vreduce.m
+++ b/GraphBLAS/@GrB/vreduce.m
@@ -30,7 +30,7 @@
 %
 % See also GrB.reduce, sum, prod, max, min.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [args, is_gb] = gb_get_args (varargin {:}) ;
diff --git a/GraphBLAS/@GrB/xor.m b/GraphBLAS/@GrB/xor.m
index 9682dcea48..28a1153456 100644
--- a/GraphBLAS/@GrB/xor.m
+++ b/GraphBLAS/@GrB/xor.m
@@ -6,7 +6,7 @@
 %
 % See also GrB/and, GrB/or.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isscalar (A))
diff --git a/GraphBLAS/@GrB/zeros.m b/GraphBLAS/@GrB/zeros.m
index 501232db5f..32fdf1ff75 100644
--- a/GraphBLAS/@GrB/zeros.m
+++ b/GraphBLAS/@GrB/zeros.m
@@ -5,7 +5,7 @@
 %
 % See also GrB/ones, GrB/false, GrB/true.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 G = varargin {end} ;
diff --git a/GraphBLAS/Contents.m b/GraphBLAS/Contents.m
index afb8f8f559..337bd3a690 100644
--- a/GraphBLAS/Contents.m
+++ b/GraphBLAS/Contents.m
@@ -61,7 +61,7 @@
 % Static Methods: used as GrB.method; inputs can be any GraphBLAS or
 % MATLAB matrix, in any combination.
 %
-%   init            finalize
+%   init            finalize        burble
 %   apply           emult           kronecker       select          
 %   assign          entries         ktruss          selectopinfo    
 %   bfs             expand          laplacian       semiringinfo    
@@ -73,7 +73,7 @@
 %   descriptorinfo  isbycol         pagerank        type            
 %   dnn             isbyrow         prune           unopinfo        
 %   eadd            isfull          random          vreduce         
-%   empty           issigned        reduce          
+%   empty           issigned        reduce 
 %
 % Tim Davis, Texas A&M University, http://faculty.cse.tamu.edu/davis/GraphBLAS
 
diff --git a/GraphBLAS/GAP/Contents.m b/GraphBLAS/GAP/Contents.m
new file mode 100644
index 0000000000..ff0021a91e
--- /dev/null
+++ b/GraphBLAS/GAP/Contents.m
@@ -0,0 +1,13 @@
+% GAP: GAP benchmark methods in MATLAB (in progress)
+%
+% Files
+%   gap          - run GAP benchmarks
+%   gap_bfs      - run bfs for the GAP benchmark
+%   gap_pagerank - PageRank of a graph (GAP benchmark algorithm)
+%   gap_pr       - run pagerank for the GAP benchmark
+%   gap_tc       - run tricount for the GAP benchmark
+%   tric         - triangle countting tests
+%   ttest        - run triangle counting tests
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
diff --git a/GraphBLAS/GAP/gap.m b/GraphBLAS/GAP/gap.m
new file mode 100644
index 0000000000..d01cc5d6c1
--- /dev/null
+++ b/GraphBLAS/GAP/gap.m
@@ -0,0 +1,129 @@
+%GAP run GAP benchmarks
+
+clear all
+rng ('default') ;
+
+matrices = {
+    'GAP/GAP-kron'
+    'GAP/GAP-urand'
+    'GAP/GAP-twitter'
+    'GAP/GAP-web'
+    'GAP/GAP-road' } ;
+
+matrices = { 'HB/west0067' } ;
+matrices = { 'HB/west0067', 'LAW/indochina-2004' } ;
+
+for k = 1:length(matrices)
+
+    %---------------------------------------------------------------------------
+    % get the GAP problem
+    %---------------------------------------------------------------------------
+
+    Prob = ssget (matrices {k}) ;
+    A_col = GrB (Prob.A, 'by col', 'int32') ;
+    A_row = GrB (Prob.A, 'by row', 'int32') ;
+    n = size (Prob.A,1) ;
+    try
+        sources = Prob.aux.sources' ;
+    catch
+        sources = randperm (n, 64) ;
+    end
+    fprintf ('\n%s: nodes: %g million  nvals: %g million\n', ...
+        Prob.name, n / 1e6, nnz (Prob.A) / 1e6) ;
+
+    % fix west0067
+    if (n == 67)
+        A_col = spones (A_col) ;
+        A_row = spones (A_row) ;
+    end
+
+    clear Prob
+
+    %---------------------------------------------------------------------------
+    % BFS
+    %---------------------------------------------------------------------------
+
+    fprintf ('\nBFS tests:\n') ;
+    ntrials = length (sources) ;
+    tot = 0 ;
+    for s = sources
+        tic
+        [v pi] = GrB.bfs (A_row, s) ;
+        t = toc ;
+        tot = tot + t ;
+        fprintf ('source: %8d  #visited %8d  levels: %8d time: %g\n', ...
+            s, nnz (v), max (v), t) ;
+    end
+    fprintf ('average bfs time: %g (%d trials)\n', tot / ntrials, ntrials) ;
+    if (n < 1000)
+        pi (s) = 0 ;
+        treeplot (double (pi))
+    end
+
+    %---------------------------------------------------------------------------
+    % PageRank
+    %---------------------------------------------------------------------------
+
+    % Note that the GrB.pagerank is slightly different than the GAP pagerank.
+    % The GAP benchmark ignores nodes with zero out-degree.  The GrB.pagerank
+    % matches the MATLAB @graph/centrality (A, 'pagerank') method, which
+    % handles such nodes properly.
+
+    fprintf ('\nPageRank tests:\n') ;
+    ntrials = length (sources) ;
+    opts.type = 'single' ;
+    ntrials = 1 ; % 16 ;
+    tot = 0 ;
+    for trial = 1:ntrials
+        tic
+        r = GrB.pagerank (A_col, opts) ;
+        t = toc ;
+        tot = tot + t ;
+        fprintf ('pagerank time: %g\n', t) ;
+    end
+    fprintf ('average pagerank time: %g (%d trials)\n', tot/ntrials, ntrials) ;
+
+    if (n < 8*1e6)
+        fprintf ('\nCompare with built-in MATLAB pagerank:\n') ;
+        G = digraph (double (A_col)) ;
+        tic
+        rmatlab = centrality (G, 'pagerank') ;
+        t = toc ;
+        fprintf ('MATLAB time: %g sec (one trial)\n', t) ;
+
+        [r1, i1] = sort (full (double (r))) ;
+        [r2, i2] = sort (full (double (rmatlab))) ;
+
+        for k = 1:10
+            fprintf ('rank: %2d GrB: node %5d (%10.4e)', k, i1 (k), r1 (k)) ;
+            fprintf (' MATLAB: node %5d (%10.4e)\n', i2 (k), r2 (k)) ;
+        end
+    end
+
+    clear r rmatlab
+
+    %---------------------------------------------------------------------------
+    % triangle count
+    %---------------------------------------------------------------------------
+
+    fprintf ('\nTriangle Count tests:\n') ;
+
+    % matrix must be symmetric
+    S = GrB (A_row + A_row', 'logical') ;
+
+    ntrials = 1 ; % 3 ;
+    tot = 0 ;
+    for trials = 1:ntrials
+        tic
+        c = GrB.tricount (S, s) ;
+        t = toc ;
+        tot = tot + t ;
+        fprintf ('# of triangles: %8d time: %g\n', c, t) ;
+    end
+    fprintf ('average tricount time: %g (%d trials)\n', ...
+        tot / ntrials, ntrials) ;
+
+    clear S
+
+end
+
diff --git a/GraphBLAS/GAP/gap_bfs.m b/GraphBLAS/GAP/gap_bfs.m
new file mode 100644
index 0000000000..b6e8bfb6b6
--- /dev/null
+++ b/GraphBLAS/GAP/gap_bfs.m
@@ -0,0 +1,114 @@
+function gap_bfs
+%GAP_BFS run bfs for the GAP benchmark
+
+rng ('default') ;
+
+% warmup, to make sure GrB library is loaded
+C = GrB (1) * GrB (1) + 1 ;
+clear C
+
+% smaller test matrices:
+% matrices = { 'HB/west0067', 'LAW/indochina-2004' } ;
+matrices = { 'HB/west0067' } ;
+matrices = { 'LAW/indochina-2004' } ;
+
+% the GAP test matrices:
+matrices = {
+    'GAP/GAP-kron'
+    'GAP/GAP-urand'
+    'GAP/GAP-twitter'
+    'GAP/GAP-web'
+    'GAP/GAP-road'
+    } ;
+
+[status, result] = system ('hostname') ;
+clear status
+if (isequal (result (1:5), 'hyper'))
+    fprintf ('hypersparse: %d threads\n', GrB.threads (40)) ;
+elseif (isequal (result (1:5), 'slash'))
+    fprintf ('slash: %d threads\n', GrB.threads (8)) ;
+else
+    fprintf ('default: %d threads\n', GrB.threads) ;
+end
+clear result
+
+for k = 1:length(matrices)
+
+    %---------------------------------------------------------------------------
+    % get the GAP problem
+    %---------------------------------------------------------------------------
+
+    t1 = tic ;
+    clear A Prob d
+    Prob = ssget (matrices {k}) ;
+    A = GrB (Prob.A, 'by row', 'logical') ;
+    n = size (Prob.A,1) ;
+    try
+        sources = Prob.aux.sources ;
+    catch
+        sources = randperm (n, 64) ;
+    end
+    fprintf ('\n%s: nodes: %g million  nvals: %g million\n', ...
+        Prob.name, n / 1e6, nnz (Prob.A) / 1e6) ;
+    clear Prob
+    t1 = toc (t1) ;
+    fprintf ('load time: %g sec\n', t1) ;
+
+    ntrials = length (sources) ;
+
+    %---------------------------------------------------------------------------
+    % BFS with GrB.bfs
+    %---------------------------------------------------------------------------
+
+    fprintf ('\nGrB.bfs  tests:\n') ;
+
+    tot = 0 ;
+    for trial = 1:ntrials
+        s = sources (trial) ;
+        tstart = tic ;
+        [v, parent] = GrB.bfs (A, s) ;
+        % v = GrB.bfs (A, s) ;
+        t = toc (tstart) ;
+        tot = tot + t ;
+        fprintf ('trial: %2d source: %8d GrB.bfs  time: %8.3f ', trial, s, t) ;
+        fprintf ('visited: %8d depth: %8d\n', nnz (v), max (v)) ;
+        % pause
+    end
+    fprintf ('avg GrB.bfs  time:  %g (%d trials)\n', tot/ntrials, ntrials) ;
+
+    %---------------------------------------------------------------------------
+    % BFS with MATLAB
+    %---------------------------------------------------------------------------
+
+    % if (n < 24*1e6)
+    try
+        fprintf ('\nCompare with built-in MATLAB bfs:\n') ;
+        A = GrB (A, 'by col') ;
+        A = double (A) ;
+        G = digraph (A) ;
+        clear A
+
+        tot = 0 ;
+        for trial = 1:ntrials
+            s = sources (trial) ;
+            tstart = tic ;
+            [table, edgetonew] = bfsearch (G, s, 'edgetonew') ;
+            % [nodes] = bfsearch (G, s) ;
+            t = toc (tstart) ;
+            tot = tot + t ;
+            fprintf ('trial: %2d source: %8d GrB.bfs  time: %8.3f ', ...
+                trial, s, t) ;
+            fprintf ('visited: %8d\n', 1 + size (table, 1)) ;
+        end
+        fprintf ('avg bfsearch time:  %g (%d trials)\n', tot/ntrials, ntrials) ;
+
+        clear G
+
+    catch me
+        me
+        printf ('MATLAB failed\n') ;
+    end
+
+    % clear G table parent v nodes edgetonew
+end
+
diff --git a/GraphBLAS/GAP/gap_pagerank.m b/GraphBLAS/GAP/gap_pagerank.m
new file mode 100644
index 0000000000..44e70233d6
--- /dev/null
+++ b/GraphBLAS/GAP/gap_pagerank.m
@@ -0,0 +1,70 @@
+function [r, niter] = gap_pagerank (A, d)
+%GAP_PAGERANK PageRank of a graph (GAP benchmark algorithm)
+% r = gap_pagerank (A) computes the PageRank of a graph with adjacency matrix
+% A.  This method uses the same algorithm as the GAP pagerank.  d on input
+% is the vector of out degrees, where d(i) = nnz(A(i,:)).  Sinks are ignored in
+% the GAP benchmark, so d(i) should be set to 1 if nnz(A(i,:)) is zero.
+%
+% A can be a GraphBLAS or MATLAB matrix, and must be stored by column.
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+% set options
+tol = 1e-4 ;
+maxit = 100 ;
+damp = 0.85 ;
+type = 'single' ;
+
+n = size (A, 1) ;
+
+% native, if A is already stored by column
+native = GrB.isbycol (A) ;
+if (~native)
+    error ('wrong matrix') ;
+end
+
+if (nargin < 2)
+    td = tic ;
+    d = GrB.entries (A, 'row', 'degree') ;
+    sinks = find (d == 0) ;
+    if (length (sinks) > 0)
+        d (sinks) = 1 ;
+    end
+    d = GrB (d, 'single') ;
+    t = toc (td) ;
+    fprintf ('degree time: %g\n', t) ;
+end
+
+% teleport factor
+tfactor = cast ((1 - damp) / n, type) ;
+
+% sink factor
+dn = cast (damp / n, type) ;
+
+% use A' in GrB.mxm
+desc.in0 = 'transpose' ;
+
+% initial PageRank: all nodes have rank 1/n
+r = GrB (ones (n, 1, type) / n) ;
+
+% prescale d with damp so it doesn't have to be done in each iteration
+d = d / damp ;
+
+% compute the PageRank
+for iter = 1:maxit
+    prior = r ;
+    % r(:) = tfactor
+    r = GrB.expand (tfactor, r) ;
+    %i t = prior ./ d
+    t = prior ./ d ;
+    % r = r + A' * (prior./d)
+    r = GrB.mxm (r, '+', A, '+.2nd.single', t, desc) ;
+    % e = norm (r-prior,1)
+    e = GrB.normdiff (r, prior, 1) ;
+    if (e < tol)
+        niter = iter ;
+        break ;
+    end
+end
+
diff --git a/GraphBLAS/GAP/gap_pr.m b/GraphBLAS/GAP/gap_pr.m
new file mode 100644
index 0000000000..90ffd5ad0a
--- /dev/null
+++ b/GraphBLAS/GAP/gap_pr.m
@@ -0,0 +1,136 @@
+function gap_pr
+%GAP_PR run pagerank for the GAP benchmark
+
+rng ('default') ;
+
+% warmup, to make sure GrB library is loaded
+C = GrB (1) * GrB (1) + 1 ;
+clear C
+
+% smaller test matrices:
+matrices = { 'HB/west0067', 'LAW/indochina-2004' } ;
+
+% the GAP test matrices:
+matrices = {
+    'GAP/GAP-kron'
+    'GAP/GAP-urand'
+    'GAP/GAP-twitter'
+    'GAP/GAP-web'
+    'GAP/GAP-road'
+    } ;
+
+[status, result] = system ('hostname') ;
+clear status
+if (isequal (result (1:5), 'hyper'))
+    fprintf ('hypersparse: %d threads\n', GrB.threads (40)) ;
+elseif (isequal (result (1:5), 'slash'))
+    fprintf ('slash: %d threads\n', GrB.threads (8)) ;
+else
+    fprintf ('default: %d threads\n', GrB.threads) ;
+end
+clear result
+
+for k = 1:length(matrices)
+
+    %---------------------------------------------------------------------------
+    % get the GAP problem
+    %---------------------------------------------------------------------------
+
+    t1 = tic ;
+    clear A Prob d
+    Prob = ssget (matrices {k}) ;
+    A = GrB (Prob.A, 'by col', 'logical') ;
+    n = size (Prob.A,1) ;
+    fprintf ('\n%s: nodes: %g million  nvals: %g million\n', ...
+        Prob.name, n / 1e6, nnz (Prob.A) / 1e6) ;
+    clear Prob
+    t1 = toc (t1) ;
+    fprintf ('load time: %g sec\n', t1) ;
+
+    t1 = tic ;
+    d = GrB.entries (A, 'row', 'degree') ;
+    sinks = find (d == 0) ;
+    if (length (sinks) > 0)
+        d (sinks) = 1 ;
+    end
+    clear sinks
+    d = GrB (d, 'single') ;
+    t1 = toc (t1) ;
+    fprintf ('degree time: %g sec\n', t1) ;
+
+    ntrials = 16 ;
+    % ntrials = 1 ;
+
+    %---------------------------------------------------------------------------
+    % PageRank with gap_pagerank
+    %---------------------------------------------------------------------------
+
+    fprintf ('\nGAP PageRank tests:\n') ;
+    tot = 0 ;
+    for trial = 1:ntrials
+        tstart = tic ;
+        [g, iter] = gap_pagerank (A, d) ;
+        t = toc (tstart) ;
+        tot = tot + t ;
+        fprintf ('trial: %2d GAP pagerank time: %g iter: %d\n', trial, t, iter);
+    end
+    fprintf ('avg gap_pagerank time:  %g (%d trials)\n', tot/ntrials, ntrials) ;
+
+    clear d
+
+    %---------------------------------------------------------------------------
+    % PageRank with GrB.pagerank
+    %---------------------------------------------------------------------------
+
+    % Note that GrB.pagerank is slightly different than the GAP pagerank.
+    % The GAP benchmark ignores nodes with zero out-degree.  The GrB.pagerank
+    % matches the MATLAB @graph/centrality (A, 'pagerank') method, which
+    % handles such nodes properly.
+
+    fprintf ('\nGrB PageRank tests:\n') ;
+    opts.type = 'single' ;
+
+    tot = 0 ;
+    for trial = 1:ntrials
+        tstart = tic ;
+        [r stats] = GrB.pagerank (A, opts) ;
+        t = toc (tstart) ;
+        tot = tot + t ;
+        fprintf ('trial: %2d GrB.pagerank time: %g = (%g + %g) iter: %d\n', ...
+            trial, t, stats.tinit, stats.trank, stats.iter) ;
+    end
+    fprintf ('avg GrB.pagerank time:  %g (%d trials)\n', tot/ntrials, ntrials) ;
+
+    %---------------------------------------------------------------------------
+    % PageRank with MATLAB
+    %---------------------------------------------------------------------------
+
+    % if (n < 24*1e6)
+    try
+        fprintf ('\nCompare with built-in MATLAB pagerank:\n') ;
+        A = double (A) ;
+        G = digraph (A) ;
+        clear A
+        tic
+        rmatlab = centrality (G, 'pagerank') ;
+        t = toc ;
+        fprintf ('MATLAB time: %g sec (one trial)\n', t) ;
+        clear G
+
+        [r1, i1] = sort (full (double (r))) ;
+        [r2, i2] = sort (full (double (rmatlab))) ;
+        [r3, i3] = sort (full (double (g))) ;
+
+        for k = 1:10
+            fprintf ('rank: %2d GrB: node %8d (%10.4e)', k, i1 (k), r1 (k)) ;
+            fprintf (' MATLAB: node %8d (%10.4e)', i2 (k), r2 (k)) ;
+            fprintf (' GAP: node %8d (%10.4e)\n', i3 (k), r3 (k)) ;
+        end
+    catch me
+        me
+        printf ('MATLAB failed\n') ;
+    end
+
+    clear G r g rmatlab d A r1 r2 r3 i1 i2 i3
+end
+
diff --git a/GraphBLAS/GAP/gap_tc.m b/GraphBLAS/GAP/gap_tc.m
new file mode 100644
index 0000000000..96d8a712ba
--- /dev/null
+++ b/GraphBLAS/GAP/gap_tc.m
@@ -0,0 +1,124 @@
+function gap_tc
+%GAP_TC run tricount for the GAP benchmark
+
+diary on
+rng ('default') ;
+
+% warmup, to make sure GrB library is loaded
+C = GrB (1) * GrB (1) + 1 ;
+clear C
+
+% the GAP test matrices:
+matrices = {
+    'GAP/GAP-road'
+    'GAP/GAP-web'
+    'GAP/GAP-urand'
+    'GAP/GAP-twitter'
+    'GAP/GAP-kron'
+    } ;
+
+% smaller test matrices:
+matrices = { 'HB/west0067', 'SNAP/roadNet-CA', ...
+    'GAP/GAP-road', ...
+    'GAP/GAP-web', ...
+    'GAP/GAP-urand', ...
+    'GAP/GAP-twitter', ...
+    'GAP/GAP-kron' }
+
+matrices = { 'HB/west0067', 'SNAP/roadNet-CA' , ...
+    'SNAP/com-Orkut', 'LAW/indochina-2004' }
+
+index = ssget ;
+f = find (index.nrows == index.ncols & index.nnz > 5e6 & index.isReal) ;
+[~,i] = sort (index.nnz (f)) ;
+matrices = f (i) ;
+
+[status, result] = system ('hostname') ;
+clear status
+if (isequal (result (1:5), 'hyper'))
+    fprintf ('hypersparse: %d threads\n', GrB.threads (40)) ;
+elseif (isequal (result (1:5), 'slash'))
+    fprintf ('slash: %d threads\n', GrB.threads (8)) ;
+else
+    fprintf ('default: %d threads\n', GrB.threads) ;
+end
+clear result
+
+% winners = zeros (16,1) ;  
+% total   = zeros (16,1) ;  
+% tbest   = 0 ;
+
+for k = 152:length(matrices)
+
+    %---------------------------------------------------------------------------
+    % get the GAP problem
+    %---------------------------------------------------------------------------
+
+try
+
+    id = matrices (k) ;
+    GrB.burble (0) ;
+    t1 = tic ;
+    clear A Prob
+    Prob = ssget (id, index) ;
+    A = GrB (Prob.A, 'by row', 'logical') ;
+    name = Prob.name ;
+    clear Prob
+    A = spones (A) ;
+    A = A|A' ;
+    n = size (A,1) ;
+    fprintf ('\n%s: nodes: %g million  nvals: %g million\n', ...
+        name, n / 1e6, nnz (A) / 1e6) ;
+    t1 = toc (t1) ;
+    fprintf ('load time: %g sec\n', t1) ;
+
+    ntrials = 1 ;
+
+    %---------------------------------------------------------------------------
+    % triangle count
+    %---------------------------------------------------------------------------
+
+    fprintf ('\nGrB.tricount  tests:\n') ;
+
+    tot = 0 ;
+    for trial = 1:ntrials
+        tstart = tic ;
+        s = GrB.tricount (A) ;
+        t = toc (tstart) ;
+        tot = tot + t ;
+        fprintf ('trial: %2d GrB.tricount  time: %8.3f\n', trial, t) ;
+    end
+    fprintf ('avg GrB.tricount time:  %10.3f (%d trials)\n', ...
+        tot/ntrials, ntrials) ;
+    fprintf ('triangles: %d\n', full (s)) ;
+
+    %---------------------------------------------------------------------------
+    % triangle count with permutations
+    %---------------------------------------------------------------------------
+
+    [c times best] = tric (A, s) ;
+    clear A
+
+    all_times = sum (times, 2) ;
+    total = total + all_times ;
+    winners (best) = winners (best) + 1 ;
+    tbest = tbest + all_times (best) ;
+
+    for k = 1:16
+        if (total (k) < inf)
+            fprintf ('%2d   %10.3f : %d\n', k, total (k), winners (k)) ;
+        end
+    end
+    fprintf ('best %10.3f\n', tbest) ;
+    save gap_tc_results winners total tbest k
+    diary off
+    diary on
+
+catch me
+    k
+    disp (me.message)
+end
+
+end
+
+
diff --git a/GraphBLAS/GAP/gap_tc_results.mat b/GraphBLAS/GAP/gap_tc_results.mat
new file mode 100644
index 0000000000..c5fae6572a
Binary files /dev/null and b/GraphBLAS/GAP/gap_tc_results.mat differ
diff --git a/GraphBLAS/GAP/tric.m b/GraphBLAS/GAP/tric.m
new file mode 100644
index 0000000000..3ab599a9a6
--- /dev/null
+++ b/GraphBLAS/GAP/tric.m
@@ -0,0 +1,258 @@
+function [c times best] = tric (A, cgood)
+%TRIC triangle countting tests
+% A must be logical, symmetric, and stored by row
+
+assert (GrB.isbyrow (A)) ;
+assert (isequal (GrB.type (A), 'logical')) ;
+
+rng ('default') ;
+
+desc_s.mask = 'structural' ;
+
+desc_st.mask = 'structural' ;
+desc_st.in1 = 'transpose' ;
+
+semiring = '+.pair.int64' ;
+monoid = '+.int64' ;
+
+n = size (A,1) ;
+Z = GrB (n, n, 'int64', 'by row') ;
+
+if (nargin < 2)
+    tstart = tic ;
+    cgood = GrB.tricount (A) ;
+    tgood = toc (tstart) ;
+    fprintf ('tricount time: %g   triangles %d\n', tgood, cgood) ;
+end
+
+degree = full (double (GrB.entries (A, 'row', 'degree'))) ;
+fprintf ('degree: min: %d max: %d mean: %g std: %g\n', ...
+    min (degree), max (degree), mean (degree), std (degree)) ;
+
+times = inf (16, 2) ;
+
+dot = [3 4 7 8 15 16] ; % 13] ;
+trials = dot ;
+
+for trial = trials
+
+    tstart = tic ;
+    c = -1 ;
+    tprep = inf ;
+
+    try
+
+        if (trial == 1)
+
+            % Sandia method: C<L>=L*L with saxpy method
+            L = tril (A, -1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, L, desc_s) ;
+
+        elseif (trial == 2)
+
+            % Sandia2 method: C<U>=U*U with saxpy method
+            U = triu (A, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, U, semiring, U, U, desc_s) ;
+
+        elseif (trial == 3)
+
+            % SandiaDot: C<L>=L*U': dot method
+            L = tril (A, -1) ;
+            U = triu (A, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 4)
+
+            % SandiaDot2: C<U>=U*L': dot method
+            L = tril (A, -1) ;
+            U = triu (A, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, U, semiring, U, L, desc_st) ;
+
+
+
+        elseif (trial == 5)
+
+            % sort degree, low to hi: saxpy method (Sandia)
+            [~,p] = sort (degree, 'ascend') ;
+            L = tril (A (p,p), -1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, L, desc_s) ;
+
+        elseif (trial == 6)
+
+            % sort degree, hi to low: saxpy method (Sandia)
+            [~,p] = sort (degree, 'descend') ;
+            L = tril (A (p,p), -1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, L, desc_s) ;
+
+        elseif (trial == 7)
+
+            % sort degree, low to hi: dot method (SandiaDot)
+            [~,p] = sort (degree, 'ascend') ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 8)
+
+            % sort degree, hi to low: dot method (SandiaDot)
+            [~,p] = sort (degree, 'descend') ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+
+
+        elseif (trial == 9)
+
+            % sort degree, low to hi: saxpy method
+            [~,p] = sort (degree, 'ascend') ;
+            [i j ~] = find (A) ;
+            % if p = 1:n, the rule is i > j, which is tril (A)
+            keep = p(i) > p(j) ;
+            i = i (keep) ;
+            j = j (keep) ;
+            S = GrB.build (i, j, 1, n, n, '|', 'logical') ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, S, semiring, S, S, desc_s) ;
+
+        elseif (trial == 10)
+
+            % sort degree, hi to low: saxpy method
+            [~,p] = sort (degree, 'descend') ;
+            [i j ~] = find (A) ;
+            % if p = 1:n, the rule is i > j, which is tril (A)
+            keep = p(i) > p(j) ;
+            i = i (keep) ;
+            j = j (keep) ;
+            S = GrB.build (i, j, 1, n, n, '|', 'logical') ;
+            clear keep i j
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, S, semiring, S, S, desc_s) ;
+
+        elseif (trial == 11)
+
+            % sort degree, low to hi: dot method
+            [~,p] = sort (degree, 'ascend') ;
+            [i j ~] = find (A) ;
+            % if p = 1:n, the rule is i > j, which is tril (A)
+            keep = p(i) > p(j) ;
+            ilo = i (keep) ;
+            jlo = j (keep) ;
+            L = GrB.build (ilo, jlo, 1, n, n, '|', 'logical') ;
+            keep = p(i) < p(j) ;
+            ihi = i (keep) ;
+            jhi = j (keep) ;
+            U = GrB.build (ihi, jhi, 1, n, n, '|', 'logical') ;
+            clear keep i j ilo jlo ihi jhi
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 12)
+
+            % sort degree, hi to low: dot method
+            [~,p] = sort (degree, 'descend') ;
+            [i j ~] = find (A) ;
+            % if p = 1:n, the rule is i > j, which is tril (A)
+            keep = p(i) > p(j) ;
+            ilo = i (keep) ;
+            jlo = j (keep) ;
+            L = GrB.build (ilo, jlo, 1, n, n, '|', 'logical') ;
+            keep = p(i) < p(j) ;
+            ihi = i (keep) ;
+            jhi = j (keep) ;
+            U = GrB.build (ihi, jhi, 1, n, n, '|', 'logical') ;
+            clear keep i j ilo jlo ihi jhi
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 13)
+
+            % sort via symrcm: dot method (SandiaDot)
+            p = symrcm (A) ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 14)
+
+            % sort via amd: dot method (SandiaDot)
+            p = amd (A) ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, L, semiring, L, U, desc_st) ;
+
+        elseif (trial == 15)
+
+            % SandiaDot2: C<U>=U*L': dot method, sorted ascending
+            [~,p] = sort (degree, 'ascend') ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, U, semiring, U, L, desc_st) ;
+
+        elseif (trial == 16)
+
+            % SandiaDot2: C<U>=U*L': dot method, sorted descending
+            [~,p] = sort (degree, 'descend') ;
+            S = A (p,p) ;
+            L = tril (S, -1) ;
+            U = triu (S, 1) ;
+            tprep = toc (tstart) ; tstart = tic ; 
+            C = GrB.mxm (Z, U, semiring, U, L, desc_st) ;
+
+        end
+
+        c = full (double (GrB.reduce (monoid, C))) ;
+
+    catch me
+        fprintf ('error: %s\n', me.message) ;
+    end
+
+    t = toc (tstart) ;
+    if (c == -1)
+        t = inf ;
+        tprep = inf ;
+    else
+        assert (c == cgood)
+    end
+
+    times (trial,1) = tprep ;
+    times (trial,2) = t ;
+    fprintf ('%2d: %10.4f %10.4f = %10.4f\n', trial, tprep, t, t+tprep) ;
+    clear C S L U ilo jlo ihi jhi keep i j p
+
+end
+
+all_time = sum (times, 2) ;
+
+fprintf ('\n') ;
+[tbest, best] = min (all_time) ;
+best = best (1) ;
+
+for trial = trials
+    t = sum (times (trial,1:2)) ;
+    fprintf ('%2d: %10.4f relative: %10.4f ', trial, t, t / tbest) ;
+    if (trial == best)
+        fprintf ('best') ;
+    end
+    fprintf ('\n') ;
+end
+
+fprintf ('\n') ;
+
+
diff --git a/GraphBLAS/GAP/ttest.m b/GraphBLAS/GAP/ttest.m
new file mode 100644
index 0000000000..3fc586a095
--- /dev/null
+++ b/GraphBLAS/GAP/ttest.m
@@ -0,0 +1,36 @@
+%TTEST run triangle counting tests
+
+index = ssget ;
+f = find (index.nnz > 1e6 & index.nrows == index.ncols) ;
+[ignore i] = sort (index.nnz (f)) ;
+f = f (i) ;
+nmat = length (f) ;
+
+winners = zeros (12,1) ;
+totals = zeros (12,1) ;
+
+for k = 1:nmat
+    id = f (k) ;
+    Prob = ssget (id, index)
+    A = spones (Prob.A) ;
+    [m, n] = size (A) ;
+    if (m ~= n)
+        A = [speye(m) A ; A' speye(n)] ;
+    else
+        A = A|A' ;
+    end
+
+    [s, times, best] = tric (A) ;
+    winners (best) = winners (best) + 1 ;
+
+    totals = totals + times ;
+
+    fprintf ('\nwinner count:\n') ;
+    for trial = 1:12
+        fprintf ('  %2d : %12.2f  %d\n', trial, totals (trial), ...
+            winners (trial)) ;
+    end
+
+end
+
+
diff --git a/GraphBLAS/README.md b/GraphBLAS/README.md
index 2e158bdbeb..912a219421 100644
--- a/GraphBLAS/README.md
+++ b/GraphBLAS/README.md
@@ -1,6 +1,6 @@
 # GraphBLAS/GraphBLAS: MATLAB interface for SuiteSparse:GraphBLAS
 
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 The GrB class provides an easy-to-use MATLAB interface to SuiteSparse:GraphBLAS.
@@ -45,22 +45,17 @@ to add the GraphBLAS interface to your path.  Then do
 
     savepath
 
-Or, if that function is not allowed because of file permissions, add these
-commands to your startup.m file:
+Or, if that function is not allowed because of file permissions, add this
+command to your startup.m file:
 
     % add the MATLAB interface to the MATLAB path
     addpath ('/home/me/SuiteSparse/GraphBLAS/GraphBLAS') :
-    try
-        GrB.init
-        fprintf ('GraphBLAS initialized\n') ;
-    catch
-        fprintf ('GraphBLAS not initialized\n') ;
-    end
 
 where the path /home/me/SuiteSparse/GraphBLAS/GraphBLAS is the full path to
-this folder.  The code that calls GrB.init should be added to your startup.m in
-all cases.  The name "GraphBLAS/GraphBLAS" is used so that this can be done in
-MATLAB:
+this folder.
+
+The name "GraphBLAS/GraphBLAS" is used for this folder so that this can be done
+in MATLAB:
 
     help graphblas
 
@@ -71,7 +66,7 @@ To get additional help, type:
 
 Next, go to the GraphBLAS/GraphBLAS/@GrB/private folder and compile the MATLAB
 mexFunctions.  Assuming your working directory is GraphBLAS/GraphBLAS
-(where this README.md) file is located, do the following:
+(where this README.md file is located), do the following:
 
     cd @GrB/private
     gbmake
@@ -93,3 +88,23 @@ Note that gbtest tests all features of the MATLAB interface to
 SuiteSparse/GraphBLAS, including error handling, so you can expect to see
 error messages during the test.  This is expected.
 
+# FUTURE: Not yet supported for GrB matrices in MATLAB:
+
+    linear indexing
+    complex matrices
+    2nd output for [x,i] = max (...) and [x,i] = min (...); needs
+        modified reduction methods inside GraphBLAS
+    'includenan' for min and max
+    singleton expansion
+    3D and higher dimensional matrices:
+        this might be done by converting the higher dimensioal
+        indices down to a large 2D space, ad relying on hypersparsity.
+    saturating element-wise binary and unary operators for integers 
+
+The last two features don't exist for MATLAB sparse matrices.
+
+For Windows: Microsoft Visual Studio does not support OpenMP tasking,
+    which means that the internal sort is not parallel, but sequential.
+    This affects GrB.build, and some uses of matrix subreferencing.
+    (C(I,J) when I and/or J are unsorted lists).
+
diff --git a/GraphBLAS/TODO.txt b/GraphBLAS/TODO.txt
deleted file mode 100644
index 70b075fc30..0000000000
--- a/GraphBLAS/TODO.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-
-TODO: The following features are not yet supported for GrB matrices in MATLAB:
-
-    linear indexing
-    complex matrices
-    2nd output for [x,i] = max (...) and [x,i] = min (...); needs
-        modified reduction methods inside GraphBLAS
-    'includenan' for min and max
-    singleton expansion
-    3D and higher dimensional matrices:
-        this might be done by converting the higher dimensioal
-        indices down to a large 2D space, ad relying on hypersparsity.
-    saturating element-wise binary and unary operators for integers 
-
-The last two features don't exist for MATLAB sparse matrices.
-
diff --git a/GraphBLAS/demo/Contents.m b/GraphBLAS/demo/Contents.m
index 7437d4705b..80eb1166d1 100644
--- a/GraphBLAS/demo/Contents.m
+++ b/GraphBLAS/demo/Contents.m
@@ -12,6 +12,6 @@
 %       gbdemo
 %       gbdemo2
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
diff --git a/GraphBLAS/demo/bfs_matlab.m b/GraphBLAS/demo/bfs_matlab.m
index 2cc3e81620..1e89087614 100644
--- a/GraphBLAS/demo/bfs_matlab.m
+++ b/GraphBLAS/demo/bfs_matlab.m
@@ -11,7 +11,10 @@
 % kth level, where the shortest path (in terms of # of edges) from  s to j has
 % length k+1.  The source node s defaults to 1.
 
-[m n] = size (A) ;
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+[m, n] = size (A) ;
 if (m ~= n)
     error ('A must be square') ;
 end
@@ -37,7 +40,7 @@
     qnew = spones (AT * q) ;
 
     % discard nodes in qnew that are already seen
-    qnew (v ~= 0) = 0 ;
+    qnew (v ~= 0) = 0 ;         %#ok
 
     % move to the new level
     q = qnew ;
diff --git a/GraphBLAS/demo/dnn_mat2gb.m b/GraphBLAS/demo/dnn_mat2gb.m
index c69be6371a..b3cb99f1d6 100644
--- a/GraphBLAS/demo/dnn_mat2gb.m
+++ b/GraphBLAS/demo/dnn_mat2gb.m
@@ -23,7 +23,7 @@
 %
 % See also GrB.dnn, dnn_matlab.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 d = struct ('format', 'by row') ;
diff --git a/GraphBLAS/demo/dnn_matlab.m b/GraphBLAS/demo/dnn_matlab.m
index dafc6916fb..4d262ad894 100644
--- a/GraphBLAS/demo/dnn_matlab.m
+++ b/GraphBLAS/demo/dnn_matlab.m
@@ -11,6 +11,9 @@
 %
 % See also GrB.dnn, dnn_mat2gb.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 Y = Y0 ;
 for i=1:length(W)
 
diff --git a/GraphBLAS/demo/dnn_results/LAGraph_dnn.c b/GraphBLAS/demo/dnn_results/LAGraph_dnn.c
index 835b132e44..d3be22c58b 100644
--- a/GraphBLAS/demo/dnn_results/LAGraph_dnn.c
+++ b/GraphBLAS/demo/dnn_results/LAGraph_dnn.c
@@ -5,7 +5,7 @@
 /*
     LAGraph:  graph algorithms based on GraphBLAS
 
-    Copyright 2019 LAGraph Contributors.
+    Copyright 2020 LAGraph Contributors.
 
     (see Contributors.txt for a full list of Contributors; see
     ContributionInstructions.txt for information on how you can Contribute to
diff --git a/GraphBLAS/demo/dnn_results/dnn_gb.m b/GraphBLAS/demo/dnn_results/dnn_gb.m
index 3373efd042..9529ea9dad 100644
--- a/GraphBLAS/demo/dnn_results/dnn_gb.m
+++ b/GraphBLAS/demo/dnn_results/dnn_gb.m
@@ -14,7 +14,7 @@
 %
 % See also dnn_matlab, dnn_mat2gb.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 Y = Y0 ;
diff --git a/GraphBLAS/demo/dnn_results/dnn_matlab.m b/GraphBLAS/demo/dnn_results/dnn_matlab.m
index 83f3beba80..ee2c5de42d 100644
--- a/GraphBLAS/demo/dnn_results/dnn_matlab.m
+++ b/GraphBLAS/demo/dnn_results/dnn_matlab.m
@@ -11,6 +11,9 @@
 %
 % See also dnn_gb, dnn_mat2gb.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 Y = Y0 ;
 for i=1:length(W)
 
diff --git a/GraphBLAS/demo/dnn_results/dnn_run.m b/GraphBLAS/demo/dnn_results/dnn_run.m
index 9a1949b4c5..46def13d7d 100644
--- a/GraphBLAS/demo/dnn_results/dnn_run.m
+++ b/GraphBLAS/demo/dnn_results/dnn_run.m
@@ -1,5 +1,8 @@
 % Set locations of files.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rootdir = '/raid/hyper/GraphChallenge/dnn_data/MATLAB' ;
 ncores = maxNumCompThreads ;
 fprintf ('# of cores :  %d\n', ncores) ;
diff --git a/GraphBLAS/demo/dnn_results/dnn_summary.m b/GraphBLAS/demo/dnn_results/dnn_summary.m
index b0a2131f86..a29353eb80 100644
--- a/GraphBLAS/demo/dnn_results/dnn_summary.m
+++ b/GraphBLAS/demo/dnn_results/dnn_summary.m
@@ -12,6 +12,9 @@
 % threads:   1 time:   13816.56 sec speedup:     1.00 rate:       4.37 billion
 % threads:   1 time:   54701.46 sec speedup:     1.00 rate:       4.42 billion
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 TM1 =  [
       23.53
       67.52
diff --git a/GraphBLAS/demo/dnn_results/dnn_summary.pdf b/GraphBLAS/demo/dnn_results/dnn_summary.pdf
index b13e12c5e5..4a5ef5e739 100644
Binary files a/GraphBLAS/demo/dnn_results/dnn_summary.pdf and b/GraphBLAS/demo/dnn_results/dnn_summary.pdf differ
diff --git a/GraphBLAS/demo/dnn_results/dnn_summary.tex b/GraphBLAS/demo/dnn_results/dnn_summary.tex
index 409e609ef0..5e9e75339e 100644
--- a/GraphBLAS/demo/dnn_results/dnn_summary.tex
+++ b/GraphBLAS/demo/dnn_results/dnn_summary.tex
@@ -1,15 +1,18 @@
 \documentclass[12pt]{article}
+\usepackage{url}
+\urlstyle{sf}
+% \usepackage[colorlinks,linkcolor=Blue,citecolor=Blue,urlcolor=Blue]{hyperref}
 \title{Sparse DNN Results with the MATLAB interface to SuiteSparse:GraphBLAS}
 \author{Tim Davis}
-\date{Sept 2, 2019}
+\date{Sept 2, 2019 (updated Feb 20, 2020)}
 
 \begin{document}
 \maketitle
 
-The tables below report the results for the 12 sparse deep neural network problems.
-Problems 1-3 use 1024 neurons, 4-6 use 4,096 neurons, 7-9 use 16K neurons, and the
-last use 64K neurons.  Each group of three uses 120, 480, and 1920 layers,
-respectively.
+The tables below report the results for the 12 sparse deep neural network
+problems.  Problems 1-3 use 1024 neurons, 4-6 use 4,096 neurons, 7-9 use 16K
+neurons, and the last use 64K neurons.  Each group of three uses 120, 480, and
+1920 layers, respectively.
 
 \section{The MATLAB code}
 
@@ -32,8 +35,9 @@ \section{The MATLAB code}
 \end{verbatim}}
 
 For comparison, here is the MATLAB reference implmentation at \newline
-http://graphchallenge.org.  It is about 60x to 70x slower than the two methods
-using GraphBLAS.  Applying the bias is more complex than the GraphBLAS code:
+\url{http://graphchallenge.org}.  It is about 60x to 70x slower than the two
+methods using GraphBLAS.  Applying the bias is more complex than the GraphBLAS
+code:
 
 {\footnotesize
 \begin{verbatim}
@@ -121,40 +125,67 @@ \section{The C code}
 } \end{verbatim}}
 
 
+\newpage
 \section{Run time results}
 
 Run time in seconds on an Intel Xeon E5-2698v4 @ 2.2GHz, with 20 hardware cores
-and 256GB of RAM, using the GCC 5.4.0 compiler, and Ubuntu 16.04.  Note that
-the icc compiler generates faster code, but it's not compatible with MATLAB,
-so the gcc compiler on the system was used instead.
-MATLAB R2018a was used.
+and 256GB of RAM, using the GCC 5.4.0 compiler, and Ubuntu 16.04.
+The fastest time in bold, for one and 40 threads.  Lower is better.
+MATLAB R2018a was used for both v3.1.0 and v3.2.0.
 
-The fastest time is shown in bold, for one and 40 threads.
-Lower is better.
+\subsection{GraphBLAS v3.1.0, August 2019}
 
+{\small
 % Run time [1 thread: MATLAB LAGraph M/L]  [40 threads: MATLAB LAGraph M/L]
-\vspace{0.1in}
 \begin{tabular}{l|rr|r||rr|r}
 \hline
-     & \multicolumn{3}{c}{one thread}               &  \multicolumn{3}{c}{40 threads}  \\
-Prob & MATLAB & LAGraph & M/L                       &   MATLAB & LAGraph & M/L         \\
+     & \multicolumn{3}{c}{one thread}                &  \multicolumn{3}{c}{40 threads}  \\
+Prob & MATLAB         & LAGraph        & M/L         &   MATLAB      & LAGraph       & M/L        \\
 \hline
-  1 & {\bf       24 }&            24  &     0.97    &            3  & {\bf        2 }&     1.17   \\
-  2 & {\bf       68 }&            68  &     0.99    &            9  & {\bf        5 }&     2.07   \\
-  3 & {\bf      242 }&           243  &     1.00    &           34  & {\bf       16 }&     2.09   \\
+  1  & {\bf       24 }&          24.2  &     0.97    &            3  & {\bf     2.4 }&     1.17   \\
+  2  & {\bf       68 }&          68.2  &     0.99    &            9  & {\bf     4.5 }&     2.07   \\
+  3  & {\bf      242 }&         243.3  &     1.00    &           34  & {\bf    16.4 }&     2.09   \\
 \hline
-  4 & {\bf       98 }&           108  &     0.90    &           10  & {\bf        9 }&     1.07   \\
-  5 & {\bf      293 }&           330  &     0.89    &{\bf       31 }&            31  &     1.00   \\
-  6 & {\bf     1076 }&          1222  &     0.88    &{\bf      117 }&           118  &     0.99   \\
+  4  & {\bf       98 }&         108.1  &     0.90    &           10  & {\bf     9.3 }&     1.07   \\
+  5  & {\bf      293 }&         330.1  &     0.89    &{\bf       31 }&         30.7  &     1.00   \\
+  6  & {\bf     1076 }&        1222.5  &     0.88    &{\bf      117 }&        117.6  &     0.99   \\
 \hline
-  7 &          766  & {\bf      741 }&     1.03     &          58  & {\bf       51 }&     1.15   \\
-  8 &         2684  & {\bf     2552 }&     1.05     &         201  & {\bf      175 }&     1.15   \\
-  9 &        10381  & {\bf     9783 }&     1.06     &         783  & {\bf      690 }&     1.13   \\
+  7  &           766  & {\bf    741.4 }&     1.03    &           58  & {\bf    51.0 }&     1.15   \\
+  8  &          2684  & {\bf   2552.1 }&     1.05    &          201  & {\bf   175.0 }&     1.15   \\
+  9  &         10381  & {\bf   9783.1 }&     1.06    &          783  & {\bf   690.1 }&     1.13   \\
 \hline
- 10 &{\bf     3777 }&          4536  &     0.83     &         254  & {\bf      245 }&     1.04   \\
- 11 &{\bf    13817 }&         16447  &     0.84     &         971  & {\bf      926 }&     1.05   \\
- 12 &{\bf    54701 }&         65492  &     0.84     &        3829  & {\bf     3743 }&     1.02   \\
+ 10  &{\bf      3777 }&        4536.3  &     0.83    &          254  & {\bf   245.3 }&     1.04   \\
+ 11  &{\bf     13817 }&       16447.9  &     0.84    &          971  & {\bf   926.4 }&     1.05   \\
+ 12  &{\bf     54701 }&       65492.3  &     0.84    &         3829  & {\bf  3743.3 }&     1.02   \\
 \end{tabular}
+}
+
+\subsection{GraphBLAS v3.2.0, Feb 2020}
+
+{\small
+% Run time [1 thread: MATLAB LAGraph M/L]  [40 threads: MATLAB LAGraph M/L]
+\begin{tabular}{l|rr|r||rr|r}
+\hline
+     & \multicolumn{3}{c}{one thread}                &  \multicolumn{3}{c}{40 threads}  \\
+Prob & MATLAB         & LAGraph        & M/L         &   MATLAB      & LAGraph       & M/L        \\
+\hline
+  1  &                &          25.1  &      .      &               &          1.4  &      .     \\
+  2  &                &          85.6  &      .      &               &          4.8  &      .     \\
+  3  &                &         328.7  &      .      &               &         18.3  &      .     \\
+\hline
+  4  &                &         102.0  &      .      &               &          6.2  &      .     \\
+  5  &                &         356.6  &      .      &               &         21.7  &      .     \\
+  6  &                &        1395.9  &      .      &               &         84.1  &      .     \\
+\hline
+  7  &                &         722.0  &      .      &               &         35.0  &      .     \\
+  8  &                &        2653.2  &      .      &               &        131.5  &      .     \\
+  9  &                &       10407.3  &      .      &               &        504.7  &      .     \\
+\hline
+ 10  &                &        4001.3  &      .      &               &        229.4  &      .     \\
+ 11  &                &       15124.5  &      .      &               &        918.9  &      .     \\
+ 12  &                &            .   &      .      &               &           .   &      .     \\
+\end{tabular}
+}
 
 \newpage
 \section{Rate results}
@@ -163,33 +194,66 @@ \section{Rate results}
 features (60,000 for all cases), divided by the run time.  Rate is reported
 in terms of billions of edges/sec.  Best rate shown in bold; higher is better.
 
-\vspace{0.1in}
+\subsection{GraphBLAS v3.1.0, August 2019}
+
+{\small
 \begin{tabular}{l|rr|r||rr|r}
 % Rate(1e9)[1 thread: MATLAB LAGraph M/L]  [40 threads: MATLAB LAGraph M/L]
 \hline
-     & \multicolumn{3}{c}{one thread}               &  \multicolumn{3}{c}{40 threads}  \\
-Prob & MATLAB & LAGraph & M/L                       &   MATLAB & LAGraph & M/L         \\
+     & \multicolumn{3}{c}{one thread}                &  \multicolumn{3}{c}{40 threads}  \\
+Prob & MATLAB         & LAGraph        & M/L         &   MATLAB      & LAGraph       & M/L        \\
 \hline
-  1 &{\bf     10.0 }&           9.7  &     1.03  &  {        85.8 }& {\bf    100.4 }&     0.85   \\
-  2 &{\bf     14.0 }&          13.8  &     1.01  &  {       101.3 }& {\bf    209.2 }&     0.48   \\
-  3 &{\bf     15.6 }&          15.5  &     1.00  &  {       110.1 }& {\bf    230.2 }&     0.48   \\
+  1  &{\bf      10.0 }&           9.7  &       1.03  &  {      85.8 }& {\bf   100.4 }&     0.85   \\
+  2  &{\bf      14.0 }&          13.8  &       1.01  &  {     101.3 }& {\bf   209.2 }&     0.48   \\
+  3  &{\bf      15.6 }&          15.5  &       1.00  &  {     110.1 }& {\bf   230.2 }&     0.48   \\
 \hline
-  4 &{\bf      9.7 }&           8.7  &     1.11  &  {        94.6 }& {\bf    101.1 }&     0.93   \\
-  5 &{\bf     12.9 }&          11.4  &     1.13  &  {\bf    123.3 }& {       122.9 }&     1.00   \\
-  6 &{\bf     14.0 }&          12.4  &     1.14  &  {\bf    129.1 }& {       128.4 }&     1.01   \\
+  4  &{\bf       9.7 }&           8.7  &       1.11  &  {      94.6 }& {\bf   101.1 }&     0.93   \\
+  5  &{\bf      12.9 }&          11.4  &       1.13  &  {\bf  123.3 }& {      122.9 }&     1.00   \\
+  6  &{\bf      14.0 }&          12.4  &       1.14  &  {\bf  129.1 }& {      128.4 }&     1.01   \\
 \hline
-  7 &          4.9  & {\bf      5.1 }&     0.97  &  {        64.6 }& {\bf     74.0 }&     0.87   \\
-  8 &          5.6  & {\bf      5.9 }&     0.95  &  {        75.2 }& {\bf     86.3 }&     0.87   \\
-  9 &          5.8  & {\bf      6.2 }&     0.94  &  {        77.2 }& {\bf     87.5 }&     0.88   \\
+  7  &           4.9  & {\bf      5.1 }&       0.97  &  {      64.6 }& {\bf    74.0 }&     0.87   \\
+  8  &           5.6  & {\bf      5.9 }&       0.95  &  {      75.2 }& {\bf    86.3 }&     0.87   \\
+  9  &           5.8  & {\bf      6.2 }&       0.94  &  {      77.2 }& {\bf    87.5 }&     0.88   \\
 \hline
- 10 &{\bf      4.0 }&           3.3  &     1.20  &  {        59.4 }& {\bf     61.5 }&     0.96   \\
- 11 &{\bf      4.4 }&           3.7  &     1.19  &  {        62.2 }& {\bf     65.2 }&     0.95   \\
- 12 &{\bf      4.4 }&           3.7  &     1.20  &  {        63.1 }& {\bf     64.5 }&     0.98   \\
+ 10  &{\bf       4.0 }&           3.3  &       1.20  &  {      59.4 }& {\bf    61.5 }&     0.96   \\
+ 11  &{\bf       4.4 }&           3.7  &       1.19  &  {      62.2 }& {\bf    65.2 }&     0.95   \\
+ 12  &{\bf       4.4 }&           3.7  &       1.20  &  {      63.1 }& {\bf    64.5 }&     0.98   \\
 \end{tabular}
+}
 
+\subsection{GraphBLAS v3.2.0, Feb 2020}
+
+{\small
+\begin{tabular}{l|rr|r||rr|r}
+% Rate(1e9)[1 thread: MATLAB LAGraph M/L]  [40 threads: MATLAB LAGraph M/L]
+\hline
+     & \multicolumn{3}{c}{one thread}                &  \multicolumn{3}{c}{40 threads}  \\
+Prob & MATLAB         & LAGraph        & M/L         &   MATLAB      & LAGraph       & M/L        \\
+\hline
+  1  &{           .  }& {         9.4 }&        .    &  {        .  }& {      164.9 }&      .     \\
+  2  &{           .  }& {        11.0 }&        .    &  {        .  }& {      198.5 }&      .     \\
+  3  &{           .  }& {        11.5 }&        .    &  {        .  }& {      205.8 }&      .     \\
+\hline
+  4  &{           .  }& {         9.2 }&        .    &  {        .  }& {      152.8 }&      .     \\
+  5  &{           .  }& {        10.6 }&        .    &  {        .  }& {      174.1 }&      .     \\
+  6  &{           .  }& {        10.8 }&        .    &  {        .  }& {      179.6 }&      .     \\
+\hline
+  7  &{           .  }& {         5.2 }&        .    &  {        .  }& {      107.9 }&      .     \\
+  8  &{           .  }& {         5.7 }&        .    &  {        .  }& {      114.8 }&      .     \\
+  9  &{           .  }& {         5.8 }&        .    &  {        .  }& {      119.7 }&      .     \\
+\hline
+ 10  &{           .  }& {         3.8 }&        .    &  {        .  }& {       65.8 }&      .     \\
+ 11  &{           .  }& {         4.0 }&        .    &  {        .  }& {       65.7 }&      .     \\
+ 12  &{           .  }& {          .  }&        .    &  {        .  }& {         .  }&      .     \\
+\end{tabular}
+}
 
 \section{Comparison}
-When using 40 threads, the performance of the two methods is
+
+\subsection{v3.1.0, August 2019}
+
+When using 40 threads, the performance of the two methods
+(MATLAB+LAGraph, vs pure C in LAGraph) is
 almost identical, except for problems 2 and 3, where LAGraph
 is about twice as fast as the MATLAB \verb'dnn_gb.m'.
 The two codes differ in how the max threshold of 32 is implemented.  The MATLAB
@@ -208,6 +272,25 @@ \section{Comparison}
 write the max32 threshold as masked assigment.  In that sense, this is a
 fair comparison between MATLAB+GraphBLAS and LAGRAPH+GraphBLAS.
 
+\subsection{v3.2.0, Feb 2020}
+
+The new method in v3.2.0 is more parallelizable than the method in v3.1.0.
+However, for the smaller problems, v3.1.0 is faster when using a single thread.
+When using 40 threads, v3.2.0 (in LAGraph) is almost always significantly
+faster than the method in v3.1.0 (except for problems 2 and 3).
+
+The sequential performance in v3.1.0 is likely higher because that version of
+GraphBLAS kept a set of global workspaces that it reused for each matrix
+multiplication (the Sauna).  Those have been removed in v3.2.0.  For the next
+release of GraphBLAS, I will explore how to improve the sequential performance
+of the Sauna-free method in v3.2.x, to see if I can match the performance of
+the Sauna-based method in v3.1.0.
+
+Using a single thread on the largest problem takes about 18 hours, and thus
+results were still in progress at the time v3.2.0 was released.  This document
+will be updated after the formal release of v3.2.0 when the runs complete.  See
+the master branch at \url{https://github.com/DrTimothyAldenDavis/GraphBLAS} for
+the updated document.
 
 \end{document}
 
diff --git a/GraphBLAS/demo/dnn_results/o_gcc540_Aug29_hypersparse_LAGRAPH.txt b/GraphBLAS/demo/dnn_results/o_gcc540_Aug29_2019_hypersparse_LAGRAPH.txt
similarity index 100%
rename from GraphBLAS/demo/dnn_results/o_gcc540_Aug29_hypersparse_LAGRAPH.txt
rename to GraphBLAS/demo/dnn_results/o_gcc540_Aug29_2019_hypersparse_LAGRAPH.txt
diff --git a/GraphBLAS/demo/dnn_results/o_gcc540_Aug29_hypersparse_MATLAB.txt b/GraphBLAS/demo/dnn_results/o_gcc540_Aug29_2019_hypersparse_MATLAB.txt
similarity index 100%
rename from GraphBLAS/demo/dnn_results/o_gcc540_Aug29_hypersparse_MATLAB.txt
rename to GraphBLAS/demo/dnn_results/o_gcc540_Aug29_2019_hypersparse_MATLAB.txt
diff --git a/GraphBLAS/demo/dnn_results/o_gcc540_Feb20_2020_hypersparse_LAGRAPH_in_progress.txt b/GraphBLAS/demo/dnn_results/o_gcc540_Feb20_2020_hypersparse_LAGRAPH_in_progress.txt
new file mode 100644
index 0000000000..31ab777278
--- /dev/null
+++ b/GraphBLAS/demo/dnn_results/o_gcc540_Feb20_2020_hypersparse_LAGRAPH_in_progress.txt
@@ -0,0 +1,209 @@
+Script started on Wed 19 Feb 2020 04:03:44 AM CST
+Intel Suite:
+Copyright (C) 2009-2019 Intel Corporation. All rights reserved.
+Intel(R) VTune(TM) Amplifier 2019 (build 591499)
+hypersparse $ ./go
+hypersparse.cs.tamu.edu
+SuiteSparse:GraphBLAS Feb 18, 2020
+type: float
+max # of nthreads: 40
+# of problems to solve: 12
+
+# neurons: 1024 bias: -0.3
+# features: 60000 read time: 2.42641 sec
+# entries in Y0: 6.37451 million
+
+--------------------------------------neurons per layer: 1024 layers: 120
+read net time 1.26316 sec
+# edges in all layers: 3.93216 million
+
+nthreads  1: soln time        25.13 sec                  rate     9.3888 (1e9 edges/sec) 
+nthreads  2: soln time        12.67 sec speedup     1.98 rate    18.6217 (1e9 edges/sec) 
+nthreads  4: soln time         6.47 sec speedup     3.88 rate    36.4715 (1e9 edges/sec) 
+nthreads  8: soln time         3.31 sec speedup     7.60 rate    71.3507 (1e9 edges/sec) 
+nthreads 16: soln time         1.92 sec speedup    13.06 rate   122.6437 (1e9 edges/sec) 
+nthreads 20: soln time         1.71 sec speedup    14.73 rate   138.2980 (1e9 edges/sec) 
+nthreads 32: soln time         1.61 sec speedup    15.59 rate   146.3817 (1e9 edges/sec) 
+nthreads 40: soln time         1.43 sec speedup    17.56 rate   164.9021 (1e9 edges/sec) 
+
+# entries in final Y: 1.85549 million
+check time: 0.00117111 sec
+
+--------------------------------------neurons per layer: 1024 layers: 480
+read net time 3.61133 sec
+# edges in all layers: 15.7286 million
+
+nthreads  1: soln time        85.55 sec                  rate    11.0313 (1e9 edges/sec) 
+nthreads  2: soln time        43.48 sec speedup     1.97 rate    21.7030 (1e9 edges/sec) 
+nthreads  4: soln time        21.99 sec speedup     3.89 rate    42.9190 (1e9 edges/sec) 
+nthreads  8: soln time        11.27 sec speedup     7.59 rate    83.7274 (1e9 edges/sec) 
+nthreads 16: soln time         5.91 sec speedup    14.46 rate   159.5588 (1e9 edges/sec) 
+nthreads 20: soln time         5.07 sec speedup    16.87 rate   186.0758 (1e9 edges/sec) 
+nthreads 32: soln time         5.36 sec speedup    15.95 rate   175.9893 (1e9 edges/sec) 
+nthreads 40: soln time         4.75 sec speedup    17.99 rate   198.4934 (1e9 edges/sec) 
+
+# entries in final Y: 1.85549 million
+check time: 0.00116897 sec
+
+--------------------------------------neurons per layer: 1024 layers: 1920
+read net time 13.9862 sec
+# edges in all layers: 62.9146 million
+
+nthreads  1: soln time       328.73 sec                  rate    11.4831 (1e9 edges/sec) 
+nthreads  2: soln time       166.29 sec speedup     1.98 rate    22.7010 (1e9 edges/sec) 
+nthreads  4: soln time        84.37 sec speedup     3.90 rate    44.7445 (1e9 edges/sec) 
+nthreads  8: soln time        42.86 sec speedup     7.67 rate    88.0834 (1e9 edges/sec) 
+nthreads 16: soln time        22.55 sec speedup    14.58 rate   167.4269 (1e9 edges/sec) 
+nthreads 20: soln time        19.56 sec speedup    16.81 rate   192.9795 (1e9 edges/sec) 
+nthreads 32: soln time        20.53 sec speedup    16.01 rate   183.8552 (1e9 edges/sec) 
+nthreads 40: soln time        18.34 sec speedup    17.92 rate   205.7890 (1e9 edges/sec) 
+
+# entries in final Y: 1.85549 million
+check time: 0.00104904 sec
+
+# neurons: 4096 bias: -0.35
+# features: 60000 read time: 9.31665 sec
+# entries in Y0: 25.0191 million
+
+--------------------------------------neurons per layer: 4096 layers: 120
+read net time 4.82429 sec
+# edges in all layers: 15.7286 million
+
+nthreads  1: soln time       102.04 sec                  rate     9.2482 (1e9 edges/sec) 
+nthreads  2: soln time        51.11 sec speedup     2.00 rate    18.4633 (1e9 edges/sec) 
+nthreads  4: soln time        25.93 sec speedup     3.93 rate    36.3882 (1e9 edges/sec) 
+nthreads  8: soln time        13.75 sec speedup     7.42 rate    68.6397 (1e9 edges/sec) 
+nthreads 16: soln time         7.59 sec speedup    13.44 rate   124.3403 (1e9 edges/sec) 
+nthreads 20: soln time         7.46 sec speedup    13.67 rate   126.4293 (1e9 edges/sec) 
+nthreads 32: soln time         7.07 sec speedup    14.43 rate   133.4974 (1e9 edges/sec) 
+nthreads 40: soln time         6.17 sec speedup    16.53 rate   152.8356 (1e9 edges/sec) 
+
+# entries in final Y: 7.3769 million
+check time: 0.00218487 sec
+
+--------------------------------------neurons per layer: 4096 layers: 480
+read net time 14.2522 sec
+# edges in all layers: 62.9146 million
+
+nthreads  1: soln time       356.59 sec                  rate    10.5859 (1e9 edges/sec) 
+nthreads  2: soln time       181.15 sec speedup     1.97 rate    20.8381 (1e9 edges/sec) 
+nthreads  4: soln time        92.36 sec speedup     3.86 rate    40.8725 (1e9 edges/sec) 
+nthreads  8: soln time        47.99 sec speedup     7.43 rate    78.6534 (1e9 edges/sec) 
+nthreads 16: soln time        27.38 sec speedup    13.02 rate   137.8737 (1e9 edges/sec) 
+nthreads 20: soln time        22.80 sec speedup    15.64 rate   165.5629 (1e9 edges/sec) 
+nthreads 32: soln time        24.57 sec speedup    14.51 rate   153.6360 (1e9 edges/sec) 
+nthreads 40: soln time        21.68 sec speedup    16.45 rate   174.1367 (1e9 edges/sec) 
+
+# entries in final Y: 7.3769 million
+check time: 0.00219083 sec
+
+--------------------------------------neurons per layer: 4096 layers: 1920
+read net time 56.5207 sec
+# edges in all layers: 251.658 million
+
+nthreads  1: soln time      1395.85 sec                  rate    10.8174 (1e9 edges/sec) 
+nthreads  2: soln time       705.78 sec speedup     1.98 rate    21.3941 (1e9 edges/sec) 
+nthreads  4: soln time       356.05 sec speedup     3.92 rate    42.4085 (1e9 edges/sec) 
+nthreads  8: soln time       184.37 sec speedup     7.57 rate    81.8969 (1e9 edges/sec) 
+nthreads 16: soln time       103.74 sec speedup    13.45 rate   145.5484 (1e9 edges/sec) 
+nthreads 20: soln time        89.15 sec speedup    15.66 rate   169.3682 (1e9 edges/sec) 
+nthreads 32: soln time        95.10 sec speedup    14.68 rate   158.7782 (1e9 edges/sec) 
+nthreads 40: soln time        84.09 sec speedup    16.60 rate   179.5605 (1e9 edges/sec) 
+
+# entries in final Y: 7.3769 million
+check time: 0.00219488 sec
+
+# neurons: 16384 bias: -0.4
+# features: 60000 read time: 37.0141 sec
+# entries in Y0: 98.8589 million
+
+--------------------------------------neurons per layer: 16384 layers: 120
+read net time 19.783 sec
+# edges in all layers: 62.9146 million
+
+nthreads  1: soln time       722.06 sec                  rate     5.2279 (1e9 edges/sec) 
+nthreads  2: soln time       361.33 sec speedup     2.00 rate    10.4472 (1e9 edges/sec) 
+nthreads  4: soln time       180.72 sec speedup     4.00 rate    20.8884 (1e9 edges/sec) 
+nthreads  8: soln time        92.91 sec speedup     7.77 rate    40.6291 (1e9 edges/sec) 
+nthreads 16: soln time        50.91 sec speedup    14.18 rate    74.1426 (1e9 edges/sec) 
+nthreads 20: soln time        41.06 sec speedup    17.58 rate    91.9311 (1e9 edges/sec) 
+nthreads 32: soln time        41.04 sec speedup    17.59 rate    91.9752 (1e9 edges/sec) 
+nthreads 40: soln time        35.00 sec speedup    20.63 rate   107.8596 (1e9 edges/sec) 
+
+# entries in final Y: 31.4245 million
+check time: 0.0072 sec
+
+--------------------------------------neurons per layer: 16384 layers: 480
+read net time 57.6987 sec
+# edges in all layers: 251.658 million
+
+nthreads  1: soln time      2653.20 sec                  rate     5.6911 (1e9 edges/sec) 
+nthreads  2: soln time      1334.97 sec speedup     1.99 rate    11.3107 (1e9 edges/sec) 
+nthreads  4: soln time       686.63 sec speedup     3.86 rate    21.9908 (1e9 edges/sec) 
+nthreads  8: soln time       348.26 sec speedup     7.62 rate    43.3570 (1e9 edges/sec) 
+nthreads 16: soln time       182.21 sec speedup    14.56 rate    82.8667 (1e9 edges/sec) 
+nthreads 20: soln time       150.19 sec speedup    17.67 rate   100.5390 (1e9 edges/sec) 
+nthreads 32: soln time       152.59 sec speedup    17.39 rate    98.9539 (1e9 edges/sec) 
+nthreads 40: soln time       131.49 sec speedup    20.18 rate   114.8358 (1e9 edges/sec) 
+
+# entries in final Y: 31.4245 million
+check time: 0.00654197 sec
+
+--------------------------------------neurons per layer: 16384 layers: 1920
+read net time 230.495 sec
+# edges in all layers: 1006.63 million
+
+nthreads  1: soln time     10407.27 sec                  rate     5.8034 (1e9 edges/sec) 
+nthreads  2: soln time      5210.63 sec speedup     2.00 rate    11.5913 (1e9 edges/sec) 
+nthreads  4: soln time      2680.87 sec speedup     3.88 rate    22.5292 (1e9 edges/sec) 
+nthreads  8: soln time      1360.12 sec speedup     7.65 rate    44.4063 (1e9 edges/sec) 
+nthreads 16: soln time       710.76 sec speedup    14.64 rate    84.9763 (1e9 edges/sec) 
+nthreads 20: soln time       613.12 sec speedup    16.97 rate    98.5098 (1e9 edges/sec) 
+nthreads 32: soln time       591.89 sec speedup    17.58 rate   102.0430 (1e9 edges/sec) 
+nthreads 40: soln time       504.73 sec speedup    20.62 rate   119.6633 (1e9 edges/sec) 
+
+# entries in final Y: 31.4245 million
+check time: 0.00681996 sec
+
+# neurons: 65536 bias: -0.45
+# features: 60000 read time: 148.427 sec
+# entries in Y0: 392.192 million
+
+--------------------------------------neurons per layer: 65536 layers: 120
+read net time 79.887 sec
+# edges in all layers: 251.658 million
+
+nthreads  1: soln time      4001.28 sec                  rate     3.7737 (1e9 edges/sec) 
+nthreads  2: soln time      2195.57 sec speedup     1.82 rate     6.8772 (1e9 edges/sec) 
+nthreads  4: soln time      1100.77 sec speedup     3.63 rate    13.7171 (1e9 edges/sec) 
+nthreads  8: soln time       558.42 sec speedup     7.17 rate    27.0395 (1e9 edges/sec) 
+nthreads 16: soln time       297.22 sec speedup    13.46 rate    50.8016 (1e9 edges/sec) 
+nthreads 20: soln time       243.44 sec speedup    16.44 rate    62.0255 (1e9 edges/sec) 
+nthreads 32: soln time       240.18 sec speedup    16.66 rate    62.8676 (1e9 edges/sec) 
+nthreads 40: soln time       229.36 sec speedup    17.45 rate    65.8320 (1e9 edges/sec) 
+
+# entries in final Y: 130.679 million
+check time: 0.0267251 sec
+
+--------------------------------------neurons per layer: 65536 layers: 480
+read net time 240.194 sec
+# edges in all layers: 1006.63 million
+
+nthreads  1: soln time     15124.53 sec                  rate     3.9934 (1e9 edges/sec) 
+nthreads  2: soln time      8112.42 sec speedup     1.86 rate     7.4451 (1e9 edges/sec) 
+nthreads  4: soln time      4046.98 sec speedup     3.74 rate    14.9242 (1e9 edges/sec) 
+nthreads  8: soln time      1973.16 sec speedup     7.67 rate    30.6097 (1e9 edges/sec) 
+nthreads 16: soln time      1122.76 sec speedup    13.47 rate    53.7943 (1e9 edges/sec) 
+nthreads 20: soln time       927.29 sec speedup    16.31 rate    65.1341 (1e9 edges/sec) 
+nthreads 32: soln time       941.61 sec speedup    16.06 rate    64.1434 (1e9 edges/sec) 
+nthreads 40: soln time       918.87 sec speedup    16.46 rate    65.7310 (1e9 edges/sec) 
+
+# entries in final Y: 130.679 million
+check time: 0.0251191 sec
+
+--------------------------------------neurons per layer: 65536 layers: 1920
+read net time 980.087 sec
+# edges in all layers: 4026.53 million
+
+nthreads  1: (in progress)
+
diff --git a/GraphBLAS/demo/dnn_run.m b/GraphBLAS/demo/dnn_run.m
index 8f8908e306..9c49d4fb8b 100644
--- a/GraphBLAS/demo/dnn_run.m
+++ b/GraphBLAS/demo/dnn_run.m
@@ -1,5 +1,8 @@
 % Set locations of files.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rootdir = '/raid/hyper/GraphChallenge/dnn_data/MATLAB' ;
 ncores = maxNumCompThreads ;
 fprintf ('# of cores :  %d\n', ncores) ;
diff --git a/GraphBLAS/demo/gbdemo.m b/GraphBLAS/demo/gbdemo.m
index c539711bcb..de4dcd0e0c 100644
--- a/GraphBLAS/demo/gbdemo.m
+++ b/GraphBLAS/demo/gbdemo.m
@@ -1,2 +1,6 @@
 % Run the GraphBLAS demo
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 echodemo ('graphblas_demo') ;
diff --git a/GraphBLAS/demo/gbdemo2.m b/GraphBLAS/demo/gbdemo2.m
index 63d5f631e0..7074f0319f 100644
--- a/GraphBLAS/demo/gbdemo2.m
+++ b/GraphBLAS/demo/gbdemo2.m
@@ -35,6 +35,9 @@ function gbdemo2 (bnz)
 %
 % See also GrB.assign, subsasgn.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 nthreads = GrB.threads ;
 help gbdemo2
 fprintf ('\n# of threads used in GraphBLAS: %d\n\n', nthreads) ;
@@ -47,7 +50,7 @@ function gbdemo2 (bnz)
 anz = 50000 ;
 A = sprandn (k, k, anz / k^2) ;
 
-for n = [1000:1000:6000]
+for n = 1000:1000:6000
 
     % reset the random number generator for repeatable results
     rng ('default') ;
diff --git a/GraphBLAS/demo/graphblas_demo.m b/GraphBLAS/demo/graphblas_demo.m
index c6c926906d..be38153e3c 100644
--- a/GraphBLAS/demo/graphblas_demo.m
+++ b/GraphBLAS/demo/graphblas_demo.m
@@ -4,7 +4,7 @@
 % for more details and resources.  See also the SuiteSparse:GraphBLAS
 % User Guide in this package.
 %
-% SuiteSparse:GraphBLAS, (c) 2017-2019, Tim Davis, Texas A&M University,
+% SuiteSparse:GraphBLAS, (c) 2017-2020, Tim Davis, Texas A&M University,
 % http://faculty.cse.tamu.edu/davis
 
 %% GraphBLAS: faster and more general sparse matrices for MATLAB
@@ -430,7 +430,9 @@
 %
 % d.out  = 'default' or 'replace', clears C after the accum op is used.
 %
-% d.mask = 'default' or 'complement', to use M or ~M as the mask matrix.
+% d.mask = 'default' or 'complement', to use M or ~M as the mask matrix;
+%          'structural', or 'structural complement', to use the pattern
+%           of M or ~M.
 %
 % d.in0  = 'default' or 'transpose', to transpose A for C=A*B, C=A+B, etc.
 %
@@ -1034,6 +1036,7 @@
 %   GrB.semiringinfo (s, type)   list properties of a semiring
 %   t = GrB.threads (t)          set/get # of threads to use in GraphBLAS
 %   c = GrB.chunk (c)            set/get chunk size to use in GraphBLAS
+%   b = GrB.burble (b)           set/get burble (diagnostic output)
 %   result = GrB.entries (G,...) count or query entries in a matrix
 %   result = GrB.nonz (G,...)    count or query nonzeros in a matrix
 %   C = GrB.prune (A, id)        prune entries equal to id
@@ -1053,6 +1056,7 @@
 %                                build a GrB matrix from list of entries
 %   [I,J,X] = GrB.extracttuples (A, desc)
 %                                extract all entries from a matrix
+%   s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
 
 %% GraphBLAS operations with Cout, mask M, and accum.
 %
diff --git a/GraphBLAS/demo/graphblas_demo2.m b/GraphBLAS/demo/graphblas_demo2.m
index a23342e12d..2f2541e89f 100644
--- a/GraphBLAS/demo/graphblas_demo2.m
+++ b/GraphBLAS/demo/graphblas_demo2.m
@@ -1,2 +1,6 @@
 % Run the GraphBLAS demo2
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 gbdemo2
diff --git a/GraphBLAS/demo/html/DGX_Station/README.txt b/GraphBLAS/demo/html/DGX_Station/README.txt
index aa0ac9029d..44b74cbad7 100644
--- a/GraphBLAS/demo/html/DGX_Station/README.txt
+++ b/GraphBLAS/demo/html/DGX_Station/README.txt
@@ -11,3 +11,5 @@ test did not use any of the four Volta V100 GPUs.
 
 v310: output from GraphBLAS v3.1.0
 v312: output from GraphBLAS v3.1.2 (draft, Nov 15, 2019)
+v320: output from GraphBLAS v3.2.0 (draft, Feb 19, 2020)
+
diff --git a/GraphBLAS/demo/html/DGX_Station/v320/graphblas_demo.html b/GraphBLAS/demo/html/DGX_Station/v320/graphblas_demo.html
new file mode 100644
index 0000000000..9cdbbacf3c
--- /dev/null
+++ b/GraphBLAS/demo/html/DGX_Station/v320/graphblas_demo.html
@@ -0,0 +1,2693 @@
+
+<!DOCTYPE html
+  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+   <!--
+This HTML was auto-generated from MATLAB code.
+To make changes, update the MATLAB code and republish this document.
+      --><title>GraphBLAS: graph algorithms in the language of linear algebra</title><meta name="generator" content="MATLAB 9.4"><link rel="schema.DC" href="http://purl.org/dc/elements/1.1/"><meta name="DC.date" content="2020-02-18"><meta name="DC.source" content="graphblas_demo.m"><style type="text/css">
+html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td{margin:0;padding:0;border:0;outline:0;font-size:100%;vertical-align:baseline;background:transparent}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:'';content:none}:focus{outine:0}ins{text-decoration:none}del{text-decoration:line-through}table{border-collapse:collapse;border-spacing:0}
+
+html { min-height:100%; margin-bottom:1px; }
+html body { height:100%; margin:0px; font-family:Arial, Helvetica, sans-serif; font-size:10px; color:#000; line-height:140%; background:#fff none; overflow-y:scroll; }
+html body td { vertical-align:top; text-align:left; }
+
+h1 { padding:0px; margin:0px 0px 25px; font-family:Arial, Helvetica, sans-serif; font-size:1.5em; color:#d55000; line-height:100%; font-weight:normal; }
+h2 { padding:0px; margin:0px 0px 8px; font-family:Arial, Helvetica, sans-serif; font-size:1.2em; color:#000; font-weight:bold; line-height:140%; border-bottom:1px solid #d6d4d4; display:block; }
+h3 { padding:0px; margin:0px 0px 5px; font-family:Arial, Helvetica, sans-serif; font-size:1.1em; color:#000; font-weight:bold; line-height:140%; }
+
+a { color:#005fce; text-decoration:none; }
+a:hover { color:#005fce; text-decoration:underline; }
+a:visited { color:#004aa0; text-decoration:none; }
+
+p { padding:0px; margin:0px 0px 20px; }
+img { padding:0px; margin:0px 0px 20px; border:none; }
+p img, pre img, tt img, li img, h1 img, h2 img { margin-bottom:0px; } 
+
+ul { padding:0px; margin:0px 0px 20px 23px; list-style:square; }
+ul li { padding:0px; margin:0px 0px 7px 0px; }
+ul li ul { padding:5px 0px 0px; margin:0px 0px 7px 23px; }
+ul li ol li { list-style:decimal; }
+ol { padding:0px; margin:0px 0px 20px 0px; list-style:decimal; }
+ol li { padding:0px; margin:0px 0px 7px 23px; list-style-type:decimal; }
+ol li ol { padding:5px 0px 0px; margin:0px 0px 7px 0px; }
+ol li ol li { list-style-type:lower-alpha; }
+ol li ul { padding-top:7px; }
+ol li ul li { list-style:square; }
+
+.content { font-size:1.2em; line-height:140%; padding: 20px; }
+
+pre, code { font-size:12px; }
+tt { font-size: 1.2em; }
+pre { margin:0px 0px 20px; }
+pre.codeinput { padding:10px; border:1px solid #d3d3d3; background:#f7f7f7; }
+pre.codeoutput { padding:10px 11px; margin:0px 0px 20px; color:#4c4c4c; }
+pre.error { color:red; }
+
+@media print { pre.codeinput, pre.codeoutput { word-wrap:break-word; width:100%; } }
+
+span.keyword { color:#0000FF }
+span.comment { color:#228B22 }
+span.string { color:#A020F0 }
+span.untermstring { color:#B20000 }
+span.syscmd { color:#B28C00 }
+
+.footer { width:auto; padding:10px 0px; margin:25px 0px 0px; border-top:1px dotted #878787; font-size:0.8em; line-height:140%; font-style:italic; color:#878787; text-align:left; float:none; }
+.footer p { margin:0px; }
+.footer a { color:#878787; }
+.footer a:hover { color:#878787; text-decoration:underline; }
+.footer a:visited { color:#878787; }
+
+table th { padding:7px 5px; text-align:left; vertical-align:middle; border: 1px solid #d6d4d4; font-weight:bold; }
+table td { padding:7px 5px; text-align:left; vertical-align:top; border:1px solid #d6d4d4; }
+
+
+
+
+
+  </style></head><body><div class="content"><h1>GraphBLAS: graph algorithms in the language of linear algebra</h1><!--introduction--><p>GraphBLAS is a library for creating graph algorithms based on sparse linear algebraic operations over semirings.  Visit <a href="http://graphblas.org">http://graphblas.org</a> for more details and resources.  See also the SuiteSparse:GraphBLAS User Guide in this package.</p><p>SuiteSparse:GraphBLAS, (c) 2017-2020, Tim Davis, Texas A&amp;M University, <a href="http://faculty.cse.tamu.edu/davis">http://faculty.cse.tamu.edu/davis</a></p><!--/introduction--><h2>Contents</h2><div><ul><li><a href="#1">GraphBLAS: faster and more general sparse matrices for MATLAB</a></li><li><a href="#2">Sparse integer matrices</a></li><li><a href="#3">Sparse single-precision matrices</a></li><li><a href="#4">Mixing MATLAB and GraphBLAS matrices</a></li><li><a href="#5">Faster matrix operations</a></li><li><a href="#6">A wide range of semirings</a></li><li><a href="#8">The max.plus tropical semiring</a></li><li><a href="#9">A boolean semiring</a></li><li><a href="#13">GraphBLAS operators, monoids, and semirings</a></li><li><a href="#16">Element-wise operations</a></li><li><a href="#18">Subtracting two matrices</a></li><li><a href="#20">Element-wise 'multiplication'</a></li><li><a href="#22">Overloaded operators</a></li><li><a href="#25">Overloaded functions</a></li><li><a href="#27">Zeros are handled differently</a></li><li><a href="#29">Displaying contents of a GraphBLAS matrix</a></li><li><a href="#34">Storing a matrix by row or by column</a></li><li><a href="#38">Hypersparse matrices</a></li><li><a href="#41">numel uses vpa if the matrix is really huge</a></li><li><a href="#43">The mask and accumulator</a></li><li><a href="#45">The descriptor</a></li><li><a href="#46">Integer arithmetic is different in GraphBLAS</a></li><li><a href="#48">An example graph algorithm: breadth-first search</a></li><li><a href="#49">Example graph algorithm: Luby's method in GraphBLAS</a></li><li><a href="#50">Sparse deep neural network</a></li><li><a href="#51">Solving the sparse deep neural network problem with GraphbLAS</a></li><li><a href="#52">Solving the sparse deep neural network problem with MATLAB</a></li><li><a href="#53">For objects, GraphBLAS has better colon notation than MATLAB</a></li><li><a href="#56">Iterative solvers work as-is</a></li><li><a href="#57">... even in single precision</a></li><li><a href="#60">Extreme performance differences between GraphBLAS and MATLAB.</a></li><li><a href="#61">Sparse logical indexing is much, much faster in GraphBLAS</a></li><li><a href="#63">Limitations and their future solutions</a></li><li><a href="#70">GraphBLAS operations</a></li><li><a href="#71">Methods for the GrB class:</a></li><li><a href="#75">Operator overloading:</a></li><li><a href="#76">Static Methods:</a></li><li><a href="#77">GraphBLAS basic functions:</a></li><li><a href="#78">GraphBLAS operations with Cout, mask M, and accum.</a></li><li><a href="#80">Static Methods for graph algorithms:</a></li></ul></div><h2 id="1">GraphBLAS: faster and more general sparse matrices for MATLAB</h2><p>GraphBLAS is not only useful for creating graph algorithms; it also supports a wide range of sparse matrix data types and operations. MATLAB can compute C=A*B with just two semirings: 'plus.times.double' and 'plus.times.complex' for complex matrices.  GraphBLAS has 1,040 unique built-in semirings, such as 'max.plus' (https://en.wikipedia.org/wiki/Tropical_semiring).  These semirings can be used to construct a wide variety of graph algorithms, based on operations on sparse adjacency matrices.</p><p>GraphBLAS supports sparse double and single precision matrices, logical, and sparse integer matrices: int8, int16, int32, int64, uint8, uint16, uint32, and uint64.  Complex matrices will be added in the future.</p><pre class="codeinput">clear <span class="string">all</span>
+format <span class="string">compact</span>
+rng (<span class="string">'default'</span>) ;
+X = 100 * rand (2) ;
+G = GrB (X)              <span class="comment">% GraphBLAS copy of a matrix X, same type</span>
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    81.4724
+    (2,1)    90.5792
+    (1,2)    12.6987
+    (2,2)    91.3376
+
+</pre><h2 id="2">Sparse integer matrices</h2><p>Here's an int8 version of the same matrix:</p><pre class="codeinput">S = int8 (G)            <span class="comment">% convert G to a full MATLAB int8 matrix</span>
+G = GrB (X, <span class="string">'int8'</span>)      <span class="comment">% a GraphBLAS sparse int8 matrix</span>
+</pre><pre class="codeoutput">S =
+  2&times;2 int8 matrix
+   81   12
+   90   91
+
+G =
+
+  2x2 GraphBLAS int8_t matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)   81
+    (2,1)   90
+    (1,2)   12
+    (2,2)   91
+
+</pre><h2 id="3">Sparse single-precision matrices</h2><p>Matrix operations in GraphBLAS are typically as fast, or faster than MATLAB.  Here's an unfair comparison: computing X^2 with MATLAB in double precision and with GraphBLAS in single precision.  You would naturally expect GraphBLAS to be faster.</p><p>Please wait ...</p><pre class="codeinput">n = 1e5 ;
+X = spdiags (rand (n, 201), -100:100, n, n) ;
+G = GrB (X, <span class="string">'single'</span>) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+tic
+X2 = X^2 ;
+matlab_time = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (in single)\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec (in double)\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">
+GraphBLAS time: 0.779341 sec (in single)
+MATLAB time:    9.53978 sec (in double)
+Speedup of GraphBLAS over MATLAB: 12.2408
+</pre><h2 id="4">Mixing MATLAB and GraphBLAS matrices</h2><p>The error in the last computation is about eps('single') since GraphBLAS did its computation in single precision, while MATLAB used double precision.  MATLAB and GraphBLAS matrices can be easily combined, as in X2-G2.  The sparse single precision matrices take less memory space.</p><pre class="codeinput">err = norm (X2 - G2, 1) / norm (X2,1)
+eps (<span class="string">'single'</span>)
+whos <span class="string">G</span> <span class="string">G2</span> <span class="string">X</span> <span class="string">X2</span>
+</pre><pre class="codeoutput">err =
+   1.5049e-07
+ans =
+  single
+  1.1921e-07
+  Name           Size                    Bytes  Class     Attributes
+
+  G         100000x100000            241879772  GrB                 
+  G2        100000x100000            481518572  GrB                 
+  X         100000x100000            322238408  double    sparse    
+  X2        100000x100000            641756808  double    sparse    
+
+</pre><h2 id="5">Faster matrix operations</h2><p>But even with standard double precision sparse matrices, GraphBLAS is typically faster than the built-in MATLAB methods.  Here's a fair comparison:</p><pre class="codeinput">G = GrB (X) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+err = norm (X2 - G2, 1) / norm (X2,1)
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (in double)\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec (in double)\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">err =
+     0
+
+GraphBLAS time: 0.929388 sec (in double)
+MATLAB time:    9.53978 sec (in double)
+Speedup of GraphBLAS over MATLAB: 10.2646
+</pre><h2 id="6">A wide range of semirings</h2><p>MATLAB can only compute C=A*B using the standard '+.*.double' and '+.*.complex' semirings.  A semiring is defined in terms of a string, 'add.mult.type', where 'add' is a monoid that takes the place of the additive operator, 'mult' is the multiplicative operator, and 'type' is the data type for the two inputs to the mult operator (the type defaults to the type of A for C=A*B).</p><p>In the standard semiring, C=A*B is defined as:</p><pre class="language-matlab">C(i,j) = sum (A(i,:).' .* B(:,j))
+</pre><p>using 'plus' as the monoid and 'times' as the multiplicative operator. But in a more general semiring, 'sum' can be any monoid, which is an associative and commutative operator that has an identity value.  For example, in the 'max.plus' tropical algebra, C(i,j) for C=A*B is defined as:</p><pre class="language-matlab">C(i,j) = max (A(i,:).' + B(:,j))
+</pre><p>This can be computed in GraphBLAS with:</p><pre class="language-matlab">C = GrB.mxm (<span class="string">'max.+'</span>, A, B)
+</pre><pre class="codeinput">n = 3 ;
+A = rand (n) ;
+B = rand (n) ;
+C = zeros (n) ;
+<span class="keyword">for</span> i = 1:n
+    <span class="keyword">for</span> j = 1:n
+        C(i,j) = max (A (i,:).' + B (:,j)) ;
+    <span class="keyword">end</span>
+<span class="keyword">end</span>
+C2 = GrB.mxm (<span class="string">'max.+'</span>, A, B) ;
+fprintf (<span class="string">'\nerr = norm (C-C2,1) = %g\n'</span>, norm (C-C2,1)) ;
+</pre><pre class="codeoutput">
+err = norm (C-C2,1) = 0
+</pre><h2 id="8">The max.plus tropical semiring</h2><p>Here are details of the "max.plus" tropical semiring.  The identity value is -inf since max(x,-inf) = max (-inf,x) = -inf for any x.</p><pre class="codeinput">GrB.semiringinfo (<span class="string">'max.+.double'</span>) ;
+</pre><pre class="codeoutput">
+    GraphBLAS Semiring: max.+.double (built-in)
+    GraphBLAS Monoid: semiring-&gt;add (built-in)
+    GraphBLAS BinaryOp: monoid-&gt;op (built-in) z=max(x,y)
+    GraphBLAS type: ztype double size: 8
+    GraphBLAS type: xtype double size: 8
+    GraphBLAS type: ytype double size: 8
+    identity: [    -inf ] terminal: [    inf ]
+
+    GraphBLAS BinaryOp: semiring-&gt;multiply (built-in) z=plus(x,y)
+    GraphBLAS type: ztype double size: 8
+    GraphBLAS type: xtype double size: 8
+    GraphBLAS type: ytype double size: 8
+</pre><h2 id="9">A boolean semiring</h2><p>MATLAB cannot multiply two logical matrices.  MATLAB R2019a converts them to double and uses the conventional +.*.double semiring instead. In GraphBLAS, this is the common Boolean 'or.and.logical' semiring, which is widely used in linear algebraic graph algorithms.</p><pre class="codeinput">GrB.semiringinfo (<span class="string">'|.&amp;.logical'</span>) ;
+</pre><pre class="codeoutput">
+    GraphBLAS Semiring: |.&amp;.logical (built-in)
+    GraphBLAS Monoid: semiring-&gt;add (built-in)
+    GraphBLAS BinaryOp: monoid-&gt;op (built-in) z=or(x,y)
+    GraphBLAS type: ztype bool size: 1
+    GraphBLAS type: xtype bool size: 1
+    GraphBLAS type: ytype bool size: 1
+    identity: [   0 ] terminal: [   1 ]
+
+    GraphBLAS BinaryOp: semiring-&gt;multiply (built-in) z=and(x,y)
+    GraphBLAS type: ztype bool size: 1
+    GraphBLAS type: xtype bool size: 1
+    GraphBLAS type: ytype bool size: 1
+</pre><pre class="codeinput">clear
+A = sparse (rand (3) &gt; 0.5)
+B = sparse (rand (3) &gt; 0.2)
+</pre><pre class="codeoutput">A =
+  3&times;3 sparse logical array
+   (2,1)      1
+   (2,2)      1
+   (3,2)      1
+   (1,3)      1
+B =
+  3&times;3 sparse logical array
+   (1,1)      1
+   (2,1)      1
+   (3,1)      1
+   (1,2)      1
+   (2,2)      1
+   (3,2)      1
+   (1,3)      1
+   (2,3)      1
+   (3,3)      1
+</pre><pre class="codeinput"><span class="keyword">try</span>
+    <span class="comment">% MATLAB R2019a does this by casting A and B to double</span>
+    C1 = A*B
+<span class="keyword">catch</span>
+    <span class="comment">% MATLAB R2018a throws an error</span>
+    fprintf (<span class="string">'MATLAB R2019a required for C=A*B with logical\n'</span>) ;
+    fprintf (<span class="string">'matrices.  Explicitly converting to double:\n'</span>) ;
+    C1 = double (A) * double (B)
+<span class="keyword">end</span>
+C2 = GrB (A) * GrB (B)
+</pre><pre class="codeoutput">MATLAB R2019a required for C=A*B with logical
+matrices.  Explicitly converting to double:
+C1 =
+   (1,1)        1
+   (2,1)        2
+   (3,1)        1
+   (1,2)        1
+   (2,2)        2
+   (3,2)        1
+   (1,3)        1
+   (2,3)        2
+   (3,3)        1
+
+C2 =
+
+  3x3 GraphBLAS bool matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)   1
+    (2,1)   1
+    (3,1)   1
+    (1,2)   1
+    (2,2)   1
+    (3,2)   1
+    (1,3)   1
+    (2,3)   1
+    (3,3)   1
+
+</pre><p>Note that C1 is a MATLAB sparse double matrix, and contains non-binary values.  C2 is a GraphBLAS logical matrix.</p><pre class="codeinput">whos
+GrB.type (C2)
+</pre><pre class="codeoutput">  Name      Size            Bytes  Class      Attributes
+
+  A         3x3                68  logical    sparse    
+  B         3x3               113  logical    sparse    
+  C1        3x3               176  double     sparse    
+  C2        3x3              1079  GrB                  
+
+ans =
+    'logical'
+</pre><h2 id="13">GraphBLAS operators, monoids, and semirings</h2><p>The C interface for SuiteSparse:GraphBLAS allows for arbitrary types and operators to be constructed.  However, the MATLAB interface to SuiteSparse:GraphBLAS is restricted to pre-defined types and operators: a mere 11 types, 66 unary operators, 275 binary operators, 44 monoids, 16 select operators, and 1,865 semirings (1,040 of which are unique, since some binary operators are equivalent: 'min.logical' and '&amp;.logical' are the same thing, for example).  The complex type and its binary operators, monoids, and semirings will be added in the near future.</p><p>That gives you a lot of tools to create all kinds of interesting graph algorithms.  For example:</p><pre class="language-matlab">GrB.bfs    <span class="comment">% breadth-first search</span>
+GrB.dnn    <span class="comment">% sparse deep neural network (http://graphchallenge.org)</span>
+GrB.mis    <span class="comment">% maximal independent set</span>
+</pre><p>See 'help GrB.binopinfo' for a list of the binary operators, and 'help GrB.monoidinfo' for the ones that can be used as the additive monoid in a semiring.</p><pre class="codeinput">help <span class="string">GrB.binopinfo</span>
+</pre><pre class="codeoutput"> GRB.BINOPINFO list the details of a GraphBLAS binary operator.
+ 
+  Usage
+ 
+    GrB.binopinfo
+    GrB.binopinfo (op)
+    GrB.binopinfo (op, type)
+ 
+  For GrB.binopinfo(op), the op must be a string of the form
+  'op.type', where 'op' is listed below.  The second usage allows the
+  type to be omitted from the first argument, as just 'op'.  This is
+  valid for all GraphBLAS operations, since the type defaults to the
+  type of the input matrices.  However, GrB.binopinfo does not have a
+  default type and thus one must be provided, either in the op as
+  GrB.binopinfo ('+.double'), or in the second argument, GrB.binopinfo
+  ('+', 'double').
+ 
+  The MATLAB interface to GraphBLAS provides for 25 different binary
+  operators, each of which may be used with any of the 11 types, for
+  a total of 27*11 = 297 valid binary operators.  Binary operators
+  are defined by a string of the form 'op.type', or just 'op'.  In
+  the latter case, the type defaults to the type of the matrix inputs
+  to the GraphBLAS operation.
+ 
+  The 6 comparator operators come in two flavors.  For the is*
+  operators, the result has the same type as the inputs, x and y,
+  with 1 for true and 0 for false.  For example isgt.double (pi, 3.0)
+  is the double value 1.0.  For the second set of 6 operators (eq,
+  ne, gt, lt, ge, le), the result is always logical (true or false).
+  In a semiring, the type of the add monoid must exactly match the
+  type of the output of the multiply operator, and thus
+  'plus.iseq.double' is valid (counting how many terms are equal).
+  The 'plus.eq.double' semiring is valid, but not the same semiring
+  since the 'plus' of 'plus.eq.double' has a logical type and is thus
+  equivalent to 'or.eq.double'.   The 'or.eq' is true if any terms
+  are equal and false otherwise (it does not count the number of
+  terms that are equal).
+ 
+  The following binary operators are available.  Many have equivalent
+  synonyms, so that '1st' and 'first' both define the first(x,y) = x
+  operator.
+ 
+    operator name(s) f(x,y)         |   operator names(s) f(x,y)
+    ---------------- ------         |   ----------------- ------
+    1st first        x              |   iseq             x == y
+    2nd second       y              |   isne             x ~= y
+    min              min(x,y)       |   isgt             x &gt; y
+    max              max(x,y)       |   islt             x &lt; y
+    +   plus         x+y            |   isge             x &gt;= y
+    -   minus        x-y            |   isle             x &lt;= y
+    rminus           y-x            |   ==  eq           x == y
+    *   times        x*y            |   ~=  ne           x ~= y
+    /   div          x/y            |   &gt;   gt           x &gt; y
+    \   rdiv         y/x            |   &lt;   lt           x &lt; y
+    |   || or  lor   x | y          |   &gt;=  ge           x &gt;= y
+    &amp;   &amp;&amp; and land  x &amp; y          |   &lt;=  le           x &lt;= y
+    xor lxor         xor(x,y)       |
+    pair             1              |   any              x, or y
+ 
+  The three logical operators, lor, land, and lxor, also come in 11
+  types.  z = lor.double (x,y) tests the condition (x~=0) || (y~=0),
+  and returns the double value 1.0 if true, or 0.0 if false.
+ 
+  Example:
+ 
+    % valid binary operators
+    GrB.binopinfo ('+.double') ;
+    GrB.binopinfo ('1st.int32') ;
+ 
+    % invalid binary operator (an error; this is a unary op):
+    GrB.binopinfo ('abs.double') ;
+ 
+  See also GrB.descriptorinfo, GrB.monoidinfo, GrB.selectopinfo,
+  GrB.semiringinfo, GrB.unopinfo.
+
+</pre><pre class="codeinput">help <span class="string">GrB.monoidinfo</span>
+</pre><pre class="codeoutput"> GRB.MONOIDINFO list the details of a GraphBLAS monoid.
+ 
+  Usage
+ 
+    GrB.monoidinfo
+    GrB.monoidinfo (monoid)
+    GrB.monoidinfo (monoid, type)
+ 
+  For GrB.monoidinfo(op), the op must be a string of the form
+  'op.type', where 'op' is listed below.  The second usage allows the
+  type to be omitted from the first argument, as just 'op'.  This is
+  valid for all GraphBLAS operations, since the type defaults to the
+  type of the input matrices.  However, GrB.monoidinfo does not have a
+  default type and thus one must be provided, either in the op as
+  GrB.monoidinfo ('+.double'), or in the second argument,
+  GrB.monoidinfo ('+', 'double').
+ 
+  The MATLAB interface to GraphBLAS provides for 44 different
+  monoids.  The valid monoids are: '+', '*', 'max', and 'min' for all
+  but the 'logical' type, and '|', '&amp;', 'xor', and 'eq' for the
+  'logical' type.
+ 
+  Example:
+ 
+    % valid monoids
+    GrB.monoidinfo ('+.double') ;
+    GrB.monoidinfo ('*.int32') ;
+ 
+    % invalid monoids
+    GrB.monoidinfo ('1st.int32') ;
+    GrB.monoidinfo ('abs.double') ;
+ 
+  See also GrB.binopinfo, GrB.descriptorinfo, % GrB.selectopinfo,
+  GrB.semiringinfo, GrB.unopinfo.
+
+</pre><h2 id="16">Element-wise operations</h2><p>Binary operators can be used in element-wise matrix operations, like C=A+B and C=A.*B.  For the matrix addition C=A+B, the pattern of C is the set union of A and B, and the '+' operator is applied for entries in the intersection.  Entries in A but not B, or in B but not A, are assigned to C without using the operator.  The '+' operator is used for C=A+B but any operator can be used with GrB.eadd.</p><pre class="codeinput">A = GrB (sprand (3, 3, 0.5)) ;
+B = GrB (sprand (3, 3, 0.5)) ;
+C1 = A + B
+C2 = GrB.eadd (<span class="string">'+'</span>, A, B)
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    1.47841
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    1.47841
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+err =
+     0
+</pre><h2 id="18">Subtracting two matrices</h2><p>A-B and GrB.eadd ('-', A, B) are not the same thing, since the '-' operator is not applied to an entry that is in B but not A.</p><pre class="codeinput">C1 = A-B
+C2 = GrB.eadd (<span class="string">'-'</span>, A, B)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    -0.334348
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+</pre><p>But these give the same result</p><pre class="codeinput">C1 = A-B
+C2 = GrB.eadd (<span class="string">'+'</span>, A, GrB.apply (<span class="string">'-'</span>, B))
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+err =
+     0
+</pre><h2 id="20">Element-wise 'multiplication'</h2><p>For C = A.*B, the result C is the set intersection of the pattern of A and B.  The operator is applied to entries in both A and B.  Entries in A but not B, or B but not A, do not appear in the result C.</p><pre class="codeinput">C1 = A.*B
+C2 = GrB.emult (<span class="string">'*'</span>, A, B)
+C3 = double (A) .* double (B)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.518474
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.518474
+
+C3 =
+   (1,2)       0.5185
+</pre><p>Just as in GrB.eadd, any operator can be used in GrB.emult:</p><pre class="codeinput">A
+B
+C2 = GrB.emult (<span class="string">'max'</span>, A, B)
+</pre><pre class="codeoutput">
+A =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,2)    0.572029
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+B =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    0.906378
+    (2,2)    0.146938
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.906378
+
+</pre><h2 id="22">Overloaded operators</h2><p>The following operators all work as you would expect for any matrix. The matrices A and B can be GraphBLAS matrices, or MATLAB sparse or dense matrices, in any combination, or scalars where appropriate:</p><pre>  A+B   A-B  A*B   A.*B  A./B  A.\B  A.^b   A/b   C=A(I,J)
+  -A    +A   ~A    A'    A.'   A&amp;B   A|B    b\A   C(I,J)=A
+  A~=B  A&gt;B  A==B  A&lt;=B  A&gt;=B  A&lt;B   [A,B]  [A;B]
+  A(1:end,1:end)</pre><p>For A^b, b must be a non-negative integer.</p><pre class="codeinput">C1 = [A B] ;
+C2 = [double(A) double(B)] ;
+assert (isequal (double (C1), C2))
+</pre><pre class="codeinput">C1 = A^2
+C2 = double (A)^2 ;
+err = norm (C1 - C2, 1)
+assert (err &lt; 1e-12)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  5 nonzeros, 5 entries
+
+    (2,2)    0.140946
+    (3,2)    0.0590838
+    (1,3)    0.142227
+    (2,3)    0.0259144
+    (3,3)    0.151809
+
+err =
+     0
+</pre><pre class="codeinput">C1 = A (1:2,2:end)
+A = double (A) ;
+C2 = A (1:2,2:end) ;
+assert (isequal (double (C1), C2))
+</pre><pre class="codeoutput">
+C1 =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  2 nonzeros, 2 entries
+
+    (1,1)    0.572029
+    (2,2)    0.248635
+
+</pre><h2 id="25">Overloaded functions</h2><p>Many MATLAB built-in functions can be used with GraphBLAS matrices:</p><p>A few differences with the built-in functions:</p><pre class="language-matlab">S = sparse (G)        <span class="comment">% makes a copy of a GrB matrix</span>
+F = full (G)          <span class="comment">% adds explicit zeros, so numel(F)==nnz(F)</span>
+F = full (G,type,id)  <span class="comment">% adds explicit identity values to a GrB matrix</span>
+disp (G, level)       <span class="comment">% display a GrB matrix G; level=2 is the default.</span>
+</pre><p>In the list below, the first set of Methods are overloaded built-in methods.  They are used as-is on GraphBLAS matrices, such as C=abs(G). The Static methods are prefixed with "GrB.", as in C = GrB.apply ( ... ).</p><pre class="codeinput">methods <span class="string">GrB</span>
+</pre><pre class="codeoutput">
+Methods for class GrB:
+
+GrB             ge              le              sparse          
+abs             graph           length          spfun           
+all             gt              logical         spones          
+amd             horzcat         lt              sprand          
+and             int16           max             sprandn         
+any             int32           min             sprandsym       
+assert          int64           minus           sprintf         
+bandwidth       int8            mldivide        sqrt            
+ceil            isa             mpower          subsasgn        
+colamd          isbanded        mrdivide        subsref         
+complex         isdiag          mtimes          sum             
+conj            isempty         ne              symamd          
+ctranspose      isequal         nnz             symrcm          
+diag            isfinite        nonzeros        times           
+digraph         isfloat         norm            transpose       
+disp            ishermitian     not             tril            
+display         isinf           numel           triu            
+dmperm          isinteger       nzmax           true            
+double          islogical       ones            uint16          
+eig             ismatrix        or              uint32          
+end             isnan           plus            uint64          
+eps             isnumeric       power           uint8           
+eq              isreal          prod            uminus          
+etree           isscalar        rdivide         uplus           
+false           issparse        real            vertcat         
+find            issymmetric     repmat          xor             
+fix             istril          reshape         zeros           
+flip            istriu          round           
+floor           isvector        sign            
+fprintf         kron            single          
+full            ldivide         size            
+
+Static methods:
+
+apply           emult           issigned        reduce          
+assign          entries         kronecker       select          
+bfs             expand          ktruss          selectopinfo    
+binopinfo       extract         laplacian       semiringinfo    
+build           extracttuples   mis             speye           
+burble          eye             monoidinfo      subassign       
+chunk           finalize        mxm             threads         
+clear           format          nonz            trans           
+compact         incidence       normdiff        tricount        
+descriptorinfo  init            offdiag         type            
+dnn             isbycol         pagerank        unopinfo        
+eadd            isbyrow         prune           vreduce         
+empty           isfull          random          
+
+</pre><h2 id="27">Zeros are handled differently</h2><p>Explicit zeros cannot be automatically dropped from a GraphBLAS matrix, like they are in MATLAB sparse matrices.  In a shortest-path problem, for example, an edge A(i,j) that is missing has an infinite weight, (the monoid identity of min(x,y) is +inf).  A zero edge weight A(i,j)=0 is very different from an entry that is not present in A.  However, if a GraphBLAS matrix is converted into a MATLAB sparse matrix, explicit zeros are dropped, which is the convention for a MATLAB sparse matrix. They can also be dropped from a GraphBLAS matrix using the GrB.select method.</p><pre class="codeinput">G = GrB (magic (2)) ;
+G (1,1) = 0      <span class="comment">% G(1,1) still appears as an explicit entry</span>
+A = double (G)   <span class="comment">% but it's dropped when converted to MATLAB sparse</span>
+H = GrB.select (<span class="string">'nonzero'</span>, G)  <span class="comment">% drops the explicit zeros from G</span>
+fprintf (<span class="string">'nnz (G): %d  nnz (A): %g nnz (H): %g\n'</span>, <span class="keyword">...</span>
+    nnz (G), nnz (A), nnz (H)) ;
+fprintf (<span class="string">'num entries in G: %d\n'</span>, GrB.entries (G)) ;
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  3 nonzeros, 4 entries
+
+    (1,1)    0
+    (2,1)    4
+    (1,2)    3
+    (2,2)    2
+
+A =
+   (2,1)        4
+   (1,2)        3
+   (2,2)        2
+
+H =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  3 nonzeros, 3 entries
+
+    (2,1)    4
+    (1,2)    3
+    (2,2)    2
+
+nnz (G): 3  nnz (A): 3 nnz (H): 3
+num entries in G: 4
+</pre><h2 id="29">Displaying contents of a GraphBLAS matrix</h2><p>Unlike MATLAB, the default is to display just a few entries of a GrB matrix. Here are all 100 entries of a 10-by-10 matrix, using a non-default disp(G,3):</p><pre class="codeinput">G = GrB (rand (10)) ;
+<span class="comment">% display everything:</span>
+disp (G,3)
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+    (1,1)    0.0342763
+    (2,1)    0.17802
+    (3,1)    0.887592
+    (4,1)    0.889828
+    (5,1)    0.769149
+    (6,1)    0.00497062
+    (7,1)    0.735693
+    (8,1)    0.488349
+    (9,1)    0.332817
+    (10,1)    0.0273313
+    (1,2)    0.467212
+    (2,2)    0.796714
+    (3,2)    0.849463
+    (4,2)    0.965361
+    (5,2)    0.902248
+    (6,2)    0.0363252
+    (7,2)    0.708068
+    (8,2)    0.322919
+    (9,2)    0.700716
+    (10,2)    0.472957
+    (1,3)    0.204363
+    (2,3)    0.00931977
+    (3,3)    0.565881
+    (4,3)    0.183435
+    (5,3)    0.00843818
+    (6,3)    0.284938
+    (7,3)    0.706156
+    (8,3)    0.909475
+    (9,3)    0.84868
+    (10,3)    0.564605
+    (1,4)    0.075183
+    (2,4)    0.535293
+    (3,4)    0.072324
+    (4,4)    0.515373
+    (5,4)    0.926149
+    (6,4)    0.949252
+    (7,4)    0.0478888
+    (8,4)    0.523767
+    (9,4)    0.167203
+    (10,4)    0.28341
+    (1,5)    0.122669
+    (2,5)    0.441267
+    (3,5)    0.157113
+    (4,5)    0.302479
+    (5,5)    0.758486
+    (6,5)    0.910563
+    (7,5)    0.0246916
+    (8,5)    0.232421
+    (9,5)    0.38018
+    (10,5)    0.677531
+    (1,6)    0.869074
+    (2,6)    0.471459
+    (3,6)    0.624929
+    (4,6)    0.987186
+    (5,6)    0.282885
+    (6,6)    0.843833
+    (7,6)    0.869597
+    (8,6)    0.308209
+    (9,6)    0.201332
+    (10,6)    0.706603
+    (1,7)    0.563222
+    (2,7)    0.575795
+    (3,7)    0.056376
+    (4,7)    0.73412
+    (5,7)    0.608022
+    (6,7)    0.0400164
+    (7,7)    0.540801
+    (8,7)    0.023064
+    (9,7)    0.165682
+    (10,7)    0.250393
+    (1,8)    0.23865
+    (2,8)    0.232033
+    (3,8)    0.303191
+    (4,8)    0.579934
+    (5,8)    0.267751
+    (6,8)    0.916376
+    (7,8)    0.833499
+    (8,8)    0.978692
+    (9,8)    0.734445
+    (10,8)    0.102896
+    (1,9)    0.353059
+    (2,9)    0.738955
+    (3,9)    0.57539
+    (4,9)    0.751433
+    (5,9)    0.93256
+    (6,9)    0.281622
+    (7,9)    0.51302
+    (8,9)    0.24406
+    (9,9)    0.950086
+    (10,9)    0.303638
+    (1,10)    0.563593
+    (2,10)    0.705101
+    (3,10)    0.0604146
+    (4,10)    0.672065
+    (5,10)    0.359793
+    (6,10)    0.62931
+    (7,10)    0.977758
+    (8,10)    0.394328
+    (9,10)    0.765651
+    (10,10)    0.457809
+
+
+</pre><p>That was disp(G,3), so every entry was printed.  It's a little long, so the default is not to print everything.</p><p>With the default display (level = 2):</p><pre class="codeinput">G
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+    (1,1)    0.0342763
+    (2,1)    0.17802
+    (3,1)    0.887592
+    (4,1)    0.889828
+    (5,1)    0.769149
+    (6,1)    0.00497062
+    (7,1)    0.735693
+    (8,1)    0.488349
+    (9,1)    0.332817
+    (10,1)    0.0273313
+    (1,2)    0.467212
+    (2,2)    0.796714
+    (3,2)    0.849463
+    (4,2)    0.965361
+    (5,2)    0.902248
+    (6,2)    0.0363252
+    (7,2)    0.708068
+    (8,2)    0.322919
+    (9,2)    0.700716
+    (10,2)    0.472957
+    (1,3)    0.204363
+    (2,3)    0.00931977
+    (3,3)    0.565881
+    (4,3)    0.183435
+    (5,3)    0.00843818
+    (6,3)    0.284938
+    (7,3)    0.706156
+    (8,3)    0.909475
+    (9,3)    0.84868
+    (10,3)    0.564605
+    ...
+
+</pre><p>That was disp(G,2) or just display(G), which is what is printed by a MATLAB statement that doesn't have a trailing semicolon.  With level = 1, disp(G,1) gives just a terse summary:</p><pre class="codeinput">disp (G,1)
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+
+</pre><h2 id="34">Storing a matrix by row or by column</h2><p>MATLAB stores its sparse matrices by column, refered to as 'standard CSC' in SuiteSparse:GraphBLAS.  In the CSC (compressed sparse column) format, each column of the matrix is stored as a list of entries, with their value and row index.  In the CSR (compressed sparse row) format, each row is stored as a list of values and their column indices. GraphBLAS uses both CSC and CSR, and the two formats can be intermixed arbitrarily.  In its C interface, the default format is CSR.  However, for better compatibility with MATLAB, this MATLAB interface for SuiteSparse:GraphBLAS uses CSC by default instead.</p><pre class="codeinput">rng (<span class="string">'default'</span>) ;
+GrB.clear ;                      <span class="comment">% clear all prior GraphBLAS settings</span>
+fprintf (<span class="string">'the default format is: %s\n'</span>, GrB.format) ;
+C = sparse (rand (2))
+G = GrB (C)
+GrB.format (G)
+</pre><pre class="codeoutput">the default format is: by col
+C =
+   (1,1)       0.8147
+   (2,1)       0.9058
+   (1,2)       0.1270
+   (2,2)       0.9134
+
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (2,1)    0.905792
+    (1,2)    0.126987
+    (2,2)    0.913376
+
+ans =
+    'by col'
+</pre><p>Many graph algorithms work better in CSR format, with matrices stored by row.  For example, it is common to use A(i,j) for the edge (i,j), and many graph algorithms need to access the out-adjacencies of nodes, which is the row A(i,;) for node i.  If the CSR format is desired, GrB.format ('by row') tells GraphBLAS to create all subsequent matrices in the CSR format.  Converting from a MATLAB sparse matrix (in standard CSC format) takes a little more time (requiring a transpose), but subsequent graph algorithms can be faster.</p><pre class="codeinput">G = GrB (C, <span class="string">'by row'</span>)
+fprintf (<span class="string">'the format of G is:    %s\n'</span>, GrB.format (G)) ;
+H = GrB (C)
+fprintf (<span class="string">'the format of H is:    %s\n'</span>, GrB.format (H)) ;
+err = norm (H-G,1)
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by row:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (1,2)    0.126987
+    (2,1)    0.905792
+    (2,2)    0.913376
+
+the format of G is:    by row
+
+H =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (2,1)    0.905792
+    (1,2)    0.126987
+    (2,2)    0.913376
+
+the format of H is:    by col
+err =
+     0
+</pre><h2 id="38">Hypersparse matrices</h2><p>SuiteSparse:GraphBLAS can use two kinds of sparse matrix data structures: standard and hypersparse, for both CSC and CSR formats.  In the standard CSC format used in MATLAB, an m-by-n matrix A takes O(n+nnz(A)) space.  MATLAB can create huge column vectors, but not huge matrices (when n is huge).</p><pre class="codeinput">clear <span class="string">all</span>
+[c, huge] = computer ;
+C = sparse (huge, 1)    <span class="comment">% MATLAB can create a huge-by-1 sparse column</span>
+<span class="keyword">try</span>
+    C = sparse (huge, huge)     <span class="comment">% but this fails</span>
+<span class="keyword">catch</span> me
+    error_expected = me
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">C =
+   All zero sparse: 281474976710655&times;1
+error_expected = 
+  MException with properties:
+
+    identifier: 'MATLAB:array:SizeLimitExceeded'
+       message: 'Requested 281474976710655x281474976710655 (2097152.0GB) array exceeds maximum array size preference. Creation of arrays greater than this limit may take a long time and cause MATLAB to become unresponsive. See &lt;a href="matlab: helpview([docroot '/matlab/helptargets.map'], 'matlab_env_workspace_prefs')"&gt;array size limit&lt;/a&gt; or preference panel for more information.'
+         cause: {0&times;1 cell}
+         stack: [4&times;1 struct]
+</pre><p>In a GraphBLAS hypersparse matrix, an m-by-n matrix A takes only O(nnz(A)) space.  The difference can be huge if nnz (A) &lt;&lt; n.</p><pre class="codeinput">clear
+[c, huge] = computer ;
+G = GrB (huge, 1)            <span class="comment">% no problem for GraphBLAS</span>
+H = GrB (huge, huge)         <span class="comment">% this works in GraphBLAS too</span>
+</pre><pre class="codeoutput">
+G =
+
+  281474976710655x1 GraphBLAS double matrix, sparse by col:
+  no nonzeros, no entries
+
+
+H =
+
+  281474976710655x281474976710655 GraphBLAS double matrix, hypersparse by col:
+  no nonzeros, no entries
+
+</pre><p>Operations on huge hypersparse matrices are very fast; no component of the time or space complexity is Omega(n).</p><pre class="codeinput">I = randperm (huge, 2) ;
+J = randperm (huge, 2) ;
+H (I,J) = magic (2) ;        <span class="comment">% add 4 nonzeros to random locations in H</span>
+H (I,I) = 10 * [1 2 ; 3 4] ; <span class="comment">% so H^2 is not all zero</span>
+H = H^2 ;                    <span class="comment">% square H</span>
+H = (H' * 2) ;               <span class="comment">% transpose H and double the entries</span>
+K = pi * spones (H) ;
+H = H + K                    <span class="comment">% add pi to each entry in H</span>
+</pre><pre class="codeoutput">
+H =
+
+  281474976710655x281474976710655 GraphBLAS double matrix, hypersparse by col:
+  8 nonzeros, 8 entries
+
+    (27455183225557,27455183225557)    4403.14
+    (78390279669562,27455183225557)    383.142
+    (153933462881710,27455183225557)    343.142
+    (177993304104065,27455183225557)    3003.14
+    (27455183225557,177993304104065)    2003.14
+    (78390279669562,177993304104065)    183.142
+    (153933462881710,177993304104065)    143.142
+    (177993304104065,177993304104065)    1403.14
+
+</pre><h2 id="41">numel uses vpa if the matrix is really huge</h2><pre class="codeinput">e1 = numel (G)               <span class="comment">% this is huge, but still a flint</span>
+e2 = numel (H)               <span class="comment">% this is huge^2, which needs vpa</span>
+whos <span class="string">e1</span> <span class="string">e2</span>
+</pre><pre class="codeoutput">e1 =
+   2.8147e+14
+e2 =
+79228162514263774643590529025.0
+  Name      Size            Bytes  Class     Attributes
+
+  e1        1x1                 8  double              
+  e2        1x1                 8  sym                 
+
+</pre><p>All of these matrices take very little memory space:</p><pre class="codeinput">whos <span class="string">C</span> <span class="string">G</span> <span class="string">H</span> <span class="string">K</span>
+</pre><pre class="codeoutput">  Name                    Size                         Bytes  Class    Attributes
+
+  G         281474976710655x1                            989  GrB                
+  H         281474976710655x281474976710655             1308  GrB                
+  K         281474976710655x281474976710655             1308  GrB                
+
+</pre><h2 id="43">The mask and accumulator</h2><p>When not used in overloaded operators or built-in functions, many GraphBLAS methods of the form GrB.method ( ... ) can optionally use a mask and/or an accumulator operator.  If the accumulator is '+' in GrB.mxm, for example, then C = C + A*B is computed.  The mask acts much like logical indexing in MATLAB.  With a logical mask matrix M, C&lt;M&gt;=A*B allows only part of C to be assigned.  If M(i,j) is true, then C(i,j) can be modified.  If false, then C(i,j) is not modified.</p><p>For example, to set all values in C that are greater than 0.5 to 3:</p><pre class="codeinput">A = rand (3)
+C = GrB.assign (A, A &gt; 0.5, 3) ;     <span class="comment">% in GraphBLAS</span>
+C1 = GrB (A) ; C1 (A &gt; .5) = 3       <span class="comment">% also in GraphBLAS</span>
+C2 = A      ; C2 (A &gt; .5) = 3       <span class="comment">% in MATLAB</span>
+err = norm (C - C1, 1)
+err = norm (C - C2, 1)
+</pre><pre class="codeoutput">A =
+    0.9575    0.9706    0.8003
+    0.9649    0.9572    0.1419
+    0.1576    0.4854    0.4218
+
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    3
+    (2,1)    3
+    (3,1)    0.157613
+    (1,2)    3
+    (2,2)    3
+    (3,2)    0.485376
+    (1,3)    3
+    (2,3)    0.141886
+    (3,3)    0.421761
+
+C2 =
+    3.0000    3.0000    3.0000
+    3.0000    3.0000    0.1419
+    0.1576    0.4854    0.4218
+err =
+     0
+err =
+     0
+</pre><h2 id="45">The descriptor</h2><p>Most GraphBLAS functions of the form GrB.method ( ... ) take an optional last argument, called the descriptor.  It is a MATLAB struct that can modify the computations performed by the method.  'help GrB.descriptorinfo' gives all the details.  The following is a short summary of the primary settings:</p><p>d.out  = 'default' or 'replace', clears C after the accum op is used.</p><p>d.mask = 'default' or 'complement', to use M or ~M as the mask matrix;          'structural', or 'structural complement', to use the pattern           of M or ~M.</p><p>d.in0  = 'default' or 'transpose', to transpose A for C=A*B, C=A+B, etc.</p><p>d.in1  = 'default' or 'transpose', to transpose B for C=A*B, C=A+B, etc.</p><p>d.kind = 'default', 'GrB', 'sparse', or 'full'; the output of GrB.method.</p><pre class="codeinput">A = sparse (rand (2)) ;
+B = sparse (rand (2)) ;
+C1 = A'*B ;
+C2 = GrB.mxm (<span class="string">'+.*'</span>, A, B, struct (<span class="string">'in0'</span>, <span class="string">'transpose'</span>)) ;
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">err =
+     0
+</pre><h2 id="46">Integer arithmetic is different in GraphBLAS</h2><p>MATLAB supports integer arithmetic on its full matrices, using int8, int16, int32, int64, uint8, uint16, uint32, or uint64 data types.  None of these integer data types can be used to construct a MATLAB sparse matrix, which can only be double, double complex, or logical. Furthermore, C=A*B is not defined for integer types in MATLAB, except when A and/or B are scalars.</p><p>GraphBLAS supports all of those types for its sparse matrices (except for complex, which will be added in the future).  All operations are supported, including C=A*B when A or B are any integer type, for all 1,865 semirings (1,040 of which are unique).</p><p>However, integer arithmetic differs in GraphBLAS and MATLAB.  In MATLAB, integer values saturate if they exceed their maximum value.  In GraphBLAS, integer operators act in a modular fashion.  The latter is essential when computing C=A*B over a semiring.  A saturating integer operator cannot be used as a monoid since it is not associative.</p><p>The C API for GraphBLAS allows for the creation of arbitrary user-defined types, so it would be possible to create different binary operators to allow element-wise integer operations to saturate, perhaps:</p><pre class="language-matlab">C = GrB.eadd(<span class="string">'+saturate'</span>,A,B)
+</pre><p>This would require an extension to this MATLAB interface.</p><pre class="codeinput">C = uint8 (magic (3)) ;
+G = GrB (C) ;
+C1 = C * 40
+C2 = G * 40
+C3 = double (G) * 40 ;
+S = double (C1 &lt; 255) ;
+assert (isequal (double (C1).*S, double (C2).*S))
+assert (isequal (nonzeros (C2), double (mod (nonzeros (C3), 256))))
+</pre><pre class="codeoutput">C1 =
+  3&times;3 uint8 matrix
+   255    40   240
+   120   200   255
+   160   255    80
+
+C2 =
+
+  3x3 GraphBLAS uint8_t matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)   64
+    (2,1)   120
+    (3,1)   160
+    (1,2)   40
+    (2,2)   200
+    (3,2)   104
+    (1,3)   240
+    (2,3)   24
+    (3,3)   80
+
+</pre><h2 id="48">An example graph algorithm: breadth-first search</h2><p>The breadth-first search of a graph finds all nodes reachable from the source node, and their level, v.  v=GrB.bfs(A,s) or v=bfs_matlab(A,s) compute the same thing, but GrB.bfs uses GraphBLAS matrices and operations, while bfs_matlab uses pure MATLAB operations.  v is defined as v(s) = 1 for the source node, v(i) = 2 for nodes adjacent to the source, and so on.</p><pre class="codeinput">clear <span class="string">all</span>
+rng (<span class="string">'default'</span>) ;
+n = 1e5 ;
+A = logical (sprandn (n, n, 1e-3)) ;
+
+tic
+v1 = GrB.bfs (A, 1) ;
+gb_time = toc ;
+
+tic
+v2 = bfs_matlab (A, 1) ;
+matlab_time = toc ;
+
+assert (isequal (double (v1'), v2))
+fprintf (<span class="string">'\nnodes reached: %d of %d\n'</span>, nnz (v2), n) ;
+fprintf (<span class="string">'GraphBLAS time: %g sec\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">
+nodes reached: 100000 of 100000
+GraphBLAS time: 0.315912 sec
+MATLAB time:    1.10726 sec
+Speedup of GraphBLAS over MATLAB: 3.50496
+</pre><h2 id="49">Example graph algorithm: Luby's method in GraphBLAS</h2><p>The GrB.mis.m function is variant of Luby's randomized algorithm [Luby 1985].  It is a parallel method for finding an maximal independent set of nodes, where no two nodes are adjacent.  See the GraphBLAS/@GrB/GrB.mis.m function for details.  The graph must be symmetric with a zero-free diagonal, so A is symmetrized first and any diagonal entries are removed.</p><pre class="codeinput">A = GrB (A) ;
+A = GrB.offdiag (A|A') ;
+
+tic
+s = GrB.mis (A) ;
+toc
+fprintf (<span class="string">'# nodes in the graph: %g\n'</span>, size (A,1)) ;
+fprintf (<span class="string">'# edges: : %g\n'</span>, GrB.entries (A) / 2) ;
+fprintf (<span class="string">'size of maximal independent set found: %g\n'</span>, <span class="keyword">...</span>
+    full (double (sum (s)))) ;
+
+<span class="comment">% make sure it's independent</span>
+p = find (s) ;
+S = A (p,p) ;
+assert (GrB.entries (S) == 0)
+
+<span class="comment">% make sure it's maximal</span>
+notp = find (s == 0) ;
+S = A (notp, p) ;
+deg = GrB.vreduce (<span class="string">'+.int64'</span>, S) ;
+assert (logical (all (deg &gt; 0)))
+</pre><pre class="codeoutput">Elapsed time is 0.393703 seconds.
+# nodes in the graph: 100000
+# edges: : 9.9899e+06
+size of maximal independent set found: 2811
+</pre><h2 id="50">Sparse deep neural network</h2><p>The 2019 MIT GraphChallenge (see <a href="http://graphchallenge.org">http://graphchallenge.org</a>) is to solve a set of large sparse deep neural network problems.  In this demo, the MATLAB reference solution is compared with a solution using GraphBLAS, for a randomly constructed neural network.  See the GrB.dnn and dnn_matlab.m functions for details.</p><pre class="codeinput">clear <span class="string">all</span>
+rng (<span class="string">'default'</span>) ;
+nlayers = 16 ;
+nneurons = 4096 ;
+nfeatures = 30000 ;
+fprintf (<span class="string">'# layers:   %d\n'</span>, nlayers) ;
+fprintf (<span class="string">'# neurons:  %d\n'</span>, nneurons) ;
+fprintf (<span class="string">'# features: %d\n'</span>, nfeatures) ;
+
+tic
+Y0 = sprand (nfeatures, nneurons, 0.1) ;
+<span class="keyword">for</span> layer = 1:nlayers
+    W {layer} = sprand (nneurons, nneurons, 0.01) * 0.2 ;
+    bias {layer} = -0.2 * ones (1, nneurons) ;
+<span class="keyword">end</span>
+t_setup = toc ;
+fprintf (<span class="string">'construct problem time: %g sec\n'</span>, t_setup) ;
+
+<span class="comment">% convert the problem from MATLAB to GraphBLAS</span>
+t = tic ;
+[W_gb, bias_gb, Y0_gb] = dnn_mat2gb (W, bias, Y0) ;
+t = toc (t) ;
+fprintf (<span class="string">'setup time: %g sec\n'</span>, t) ;
+</pre><pre class="codeoutput"># layers:   16
+# neurons:  4096
+# features: 30000
+construct problem time: 7.8016 sec
+setup time: 0.156962 sec
+</pre><h2 id="51">Solving the sparse deep neural network problem with GraphbLAS</h2><p>Please wait ...</p><pre class="codeinput">tic
+Y1 = GrB.dnn (W_gb, bias_gb, Y0_gb) ;
+gb_time = toc ;
+fprintf (<span class="string">'total time in GraphBLAS: %g sec\n'</span>, gb_time) ;
+</pre><pre class="codeoutput">total time in GraphBLAS: 3.42888 sec
+</pre><h2 id="52">Solving the sparse deep neural network problem with MATLAB</h2><p>Please wait ...</p><pre class="codeinput">tic
+Y2 = dnn_matlab (W, bias, Y0) ;
+matlab_time = toc ;
+fprintf (<span class="string">'total time in MATLAB:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+
+err = norm (Y1-Y2,1)
+</pre><pre class="codeoutput">total time in MATLAB:    128.26 sec
+Speedup of GraphBLAS over MATLAB: 37.4059
+err =
+     0
+</pre><h2 id="53">For objects, GraphBLAS has better colon notation than MATLAB</h2><p>The MATLAB notation C = A (start:inc:fini) is very handy, and it works great if A is a MATLAB matrix.  But for objects like the GraphBLAS matrix, MATLAB starts by creating the explicit index vector I = start:inc:fini.  That's fine if the matrix is modest in size, but GraphBLAS can construct huge matrices. The problem is that 1:n cannot be explicitly constructed when n is huge.</p><p>The C API for GraphBLAS can represent the colon notation start:inc:fini in an implicit manner, so it can do the indexing without actually forming the explicit list I = start:inc:fini. But there is no access to this method using the MATLAB notation start:inc:fini.</p><p>Thus, to compute C = A (start:inc:fini) for very huge matrices, you need to use use a cell array to represent the colon notation, as { start, inc, fini }, instead of start:inc:fini. See 'help GrB.extract' and 'help.gbsubassign' for, for C(I,J)=A.  The syntax isn't conventional, but it is far faster than the MATLAB colon notation for objects, and takes far less memory when I is huge.</p><pre class="codeinput">n = 1e14 ;
+H = GrB (n, n) ;            <span class="comment">% a huge empty matrix</span>
+I = [1 1e9 1e12 1e14] ;
+M = magic (4)
+H (I,I) = M ;
+J = {1, 1e13} ;            <span class="comment">% represents 1:1e13 colon notation</span>
+C1 = H (J, J)              <span class="comment">% computes C1 = H (1:e13,1:1e13)</span>
+c = nonzeros (C1) ;
+m = nonzeros (M (1:3, 1:3)) ;
+assert (isequal (c, m)) ;
+</pre><pre class="codeoutput">M =
+    16     2     3    13
+     5    11    10     8
+     9     7     6    12
+     4    14    15     1
+
+C1 =
+
+  10000000000000x10000000000000 GraphBLAS double matrix, hypersparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    16
+    (1000000000,1)    5
+    (1000000000000,1)    9
+    (1,1000000000)    2
+    (1000000000,1000000000)    11
+    (1000000000000,1000000000)    7
+    (1,1000000000000)    3
+    (1000000000,1000000000000)    10
+    (1000000000000,1000000000000)    6
+
+</pre><pre class="codeinput"><span class="keyword">try</span>
+    <span class="comment">% try to compute the same thing with colon</span>
+    <span class="comment">% notation (1:1e13), but this fails:</span>
+    C2 = H (1:1e13, 1:1e13)
+<span class="keyword">catch</span> me
+    error_expected = me
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">error_expected = 
+  MException with properties:
+
+    identifier: 'MATLAB:array:SizeLimitExceeded'
+       message: 'Requested 10000000000000x1 (74505.8GB) array exceeds maximum array size preference. Creation of arrays greater than this limit may take a long time and cause MATLAB to become unresponsive. See &lt;a href="matlab: helpview([docroot '/matlab/helptargets.map'], 'matlab_env_workspace_prefs')"&gt;array size limit&lt;/a&gt; or preference panel for more information.'
+         cause: {}
+         stack: [4&times;1 struct]
+</pre><h2 id="56">Iterative solvers work as-is</h2><p>Many built-in functions work with GraphBLAS matrices unmodified.</p><pre class="codeinput">A = sparse (rand (4)) ;
+b = sparse (rand (4,1)) ;
+x = gmres (A,b)
+norm (A*x-b)
+x = gmres (GrB(A), GrB(b))
+norm (A*x-b)
+</pre><pre class="codeoutput">gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   8.6711e-16
+gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   7.2802e-16
+</pre><h2 id="57">... even in single precision</h2><pre class="codeinput">x = gmres (GrB(A,<span class="string">'single'</span>), GrB(b,<span class="string">'single'</span>))
+norm (A*x-b)
+</pre><pre class="codeoutput">gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   3.5566e-07
+</pre><p>Both of the following uses of minres (A,b) fail to converge because A is not symmetric, as the method requires.  Both failures are correctly reported, and both the MATLAB version and the GraphBLAS version return the same incorrect vector x.</p><pre class="codeinput">x = minres (A, b)
+x = minres (GrB(A), GrB(b))
+</pre><pre class="codeoutput">minres stopped at iteration 4 without converging to the desired tolerance 1e-06
+because the maximum number of iterations was reached.
+The iterate returned (number 4) has relative residual 0.21.
+x =
+    0.2489
+    0.2081
+    0.0700
+    0.3928
+minres stopped at iteration 4 without converging to the desired tolerance 1e-06
+because the maximum number of iterations was reached.
+The iterate returned (number 4) has relative residual 0.21.
+
+x =
+
+  4x1 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.248942
+    (2,1)    0.208128
+    (3,1)    0.0699707
+    (4,1)    0.392812
+
+</pre><p>With a proper symmetric matrix</p><pre class="codeinput">A = A+A' ;
+x = minres (A, b)
+norm (A*x-b)
+x = minres (GrB(A), GrB(b))
+norm (A*x-b)
+</pre><pre class="codeoutput">minres converged at iteration 4 to a solution with relative residual 1.3e-11.
+x =
+ -114.0616
+   -1.4211
+  134.8227
+    2.0694
+ans =
+   1.3650e-11
+minres converged at iteration 4 to a solution with relative residual 1.3e-11.
+
+x =
+
+  4x1 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    -114.062
+    (2,1)    -1.4211
+    (3,1)    134.823
+    (4,1)    2.0694
+
+ans =
+   1.3650e-11
+</pre><h2 id="60">Extreme performance differences between GraphBLAS and MATLAB.</h2><p>The GraphBLAS operations used so far are perhaps 2x to 50x faster than the corresponding MATLAB operations, depending on how many cores your computer has.  To run a demo illustrating a 500x or more speedup versus MATLAB, run this demo:</p><pre>  gbdemo2</pre><p>It will illustrate an assignment C(I,J)=A that can take under a second in GraphBLAS but several minutes in MATLAB.  To make the comparsion even more dramatic, try:</p><pre>  gbdemo2 (20000)</pre><p>assuming you have enough memory.  The gbdemo2 is not part of this demo since it can take a long time; it tries a range of problem sizes, and each one takes several minutes in MATLAB.</p><h2 id="61">Sparse logical indexing is much, much faster in GraphBLAS</h2><p>The mask in GraphBLAS acts much like logical indexing in MATLAB, but it is not quite the same.  MATLAB logical indexing takes the form:</p><pre>     C (M) = A (M)</pre><p>which computes the same thing as the GraphBLAS statement:</p><pre>     C = GrB.assign (C, M, A)</pre><p>The GrB.assign statement computes C(M)=A(M), and it is vastly faster than C(M)=A(M), even if the time to convert the GrB matrix back to a MATLAB sparse matrix is included.</p><p>GraphBLAS can also compute C (M) = A (M) using overloaded operators for subsref and subsasgn, but C = GrB.assign (C, M, A) is a bit faster.</p><p>First, both methods in GraphBLAS (both are very fast):</p><pre class="codeinput">clear
+n = 4000 ;
+tic
+C = sprand (n, n, 0.1) ;
+A = 100 * sprand (n, n, 0.1) ;
+M = (C &gt; 0.5) ;
+t_setup = toc ;
+fprintf (<span class="string">'nnz(C): %g, nnz(M): %g, nnz(A): %g\n'</span>, <span class="keyword">...</span>
+    nnz(C), nnz(M), nnz(A)) ;
+fprintf (<span class="string">'\nsetup time:     %g sec\n'</span>, t_setup) ;
+
+<span class="comment">% include the time to convert C1 from a GraphBLAS</span>
+<span class="comment">% matrix to a MATLAB sparse matrix:</span>
+tic
+C1 = GrB.assign (C, M, A) ;
+C1 = double (C1) ;
+gb_time = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec for GrB.assign\n'</span>, gb_time) ;
+
+<span class="comment">% now using overloaded operators, also include the time to</span>
+<span class="comment">% convert back to a MATLAB sparse matrix, for good measure:</span>
+A2 = GrB (A) ;
+C2 = GrB (C) ;
+tic
+C2 (M) = A2 (M) ;
+C2 = double (C2) ;
+gb_time2 = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec for C(M)=A(M)\n'</span>, gb_time2) ;
+</pre><pre class="codeoutput">nnz(C): 1.5226e+06, nnz(M): 761163, nnz(A): 1.52245e+06
+
+setup time:     1.33984 sec
+
+GraphBLAS time: 0.018929 sec for GrB.assign
+
+GraphBLAS time: 0.072552 sec for C(M)=A(M)
+</pre><p>Please wait, this will take about 10 minutes or so ...</p><pre class="codeinput">tic
+C (M) = A (M) ;
+matlab_time = toc ;
+
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (GrB.assign)\n'</span>, gb_time) ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (overloading)\n'</span>, gb_time2) ;
+fprintf (<span class="string">'MATLAB time:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time2) ;
+
+<span class="comment">% GraphBLAS computes the exact same result with both methods:</span>
+assert (isequal (C1, C))
+assert (isequal (C2, C))
+C1 - C
+C2 - C
+</pre><pre class="codeoutput">
+GraphBLAS time: 0.018929 sec (GrB.assign)
+
+GraphBLAS time: 0.072552 sec (overloading)
+MATLAB time:    1113.39 sec
+Speedup of GraphBLAS over MATLAB: 15346.2
+ans =
+   All zero sparse: 4000&times;4000
+ans =
+   All zero sparse: 4000&times;4000
+</pre><h2 id="63">Limitations and their future solutions</h2><p>The MATLAB interface for SuiteSparse:GraphBLAS is a work-in-progress. It has some limitations, most of which will be resolved over time.</p><p>(1) Nonblocking mode:</p><p>GraphBLAS has a 'non-blocking' mode, in which operations can be left pending and completed later.  SuiteSparse:GraphBLAS uses the non-blocking mode to speed up a sequence of assignment operations, such as C(I,J)=A.  However, in its MATLAB interface, this would require a MATLAB mexFunction to modify its inputs.  That breaks the MATLAB API standard, so it cannot be safely done.  As a result, using GraphBLAS via its MATLAB interface can be slower than when using its C API.  This restriction would not be a limitation if GraphBLAS were to be incorporated into MATLAB itself, but there is likely no way to do this in a mexFunction interface to GraphBLAS.</p><p>(2) Complex matrices:</p><p>GraphBLAS can operate on matrices with arbitrary user-defined types and operators.  The only constraint is that the type be a fixed sized typedef that can be copied with the ANSI C memcpy; variable-sized types are not yet supported.  However, in this MATLAB interface, SuiteSparse:GraphBLAS has access to only predefined types, operators, and semirings.  Complex types and operators will be added to this MATLAB interface in the future.  They already appear in the C version of GraphBLAS, with user-defined operators in GraphBLAS/Demo/Source/usercomplex.c.</p><p>(3) Integer element-wise operations:</p><p>Integer operations in MATLAB saturate, so that uint8(255)+1 is 255.  To allow for integer monoids, GraphBLAS uses modular arithmetic instead. This is the only way that C=A*B can be defined for integer semirings. However, saturating integer operators could be added in the future, so that element- wise integer operations on GraphBLAS sparse integer matrices could work just the same as their MATLAB counterparts.</p><p>So in the future, you could perhaps write this, for both sparse and dense integer matrices A and B:</p><pre>     C = GrB.eadd ('+saturate.int8', A, B)</pre><p>to compute the same thing as C=A+B in MATLAB for its full int8 matrices.  Note that MATLAB can do this only for dense integer matrices, since it doesn't support sparse integer matrices.</p><p>(4) Faster methods:</p><p>Most methods in this MATLAB interface are based on efficient parallel C functions in GraphBLAS itself, and are typically as fast or faster than the equivalent built-in operators and functions in MATLAB.</p><p>There are few notable exceptions; these will be addressed in the future. Dense matrices and vectors held as GraphBLAS objects are slower than their MATLAB counterparts.  horzcat and vertcat, for [A B] and [A;B] when either A or B are GraphBLAS matrices, are also slow, as illustrated below in the next example.</p><p>Other methods that will be faster in the future include bandwidth, istriu, istril, eps, ceil, floor, round, fix, isfinite, isinf, isnan, spfun, and A.^B.  These methods are currently implemented in m-files, not in efficient parallel C functions.</p><p>Here is an example that illustrates the performance of C = [A B]</p><pre class="codeinput">clear
+A = sparse (rand (2000)) ;
+B = sparse (rand (2000)) ;
+tic
+C1 = [A B] ;
+matlab_time = toc ;
+
+A = GrB (A) ;
+B = GrB (B) ;
+tic
+C2 = [A B] ;
+gb_time = toc ;
+
+err = norm (C1-C2,1)
+fprintf (<span class="string">'\nMATLAB: %g sec, GraphBLAS: %g sec\n'</span>, <span class="keyword">...</span>
+    matlab_time, gb_time) ;
+<span class="keyword">if</span> (gb_time &gt; matlab_time)
+    fprintf (<span class="string">'GraphBLAS is slower by a factor of %g\n'</span>, <span class="keyword">...</span>
+        gb_time / matlab_time) ;
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">err =
+     0
+
+MATLAB: 0.066463 sec, GraphBLAS: 0.194094 sec
+GraphBLAS is slower by a factor of 2.92033
+</pre><p>(5) Linear indexing:</p><p>If A is an m-by-n 2D MATLAB matrix, with n &gt; 1, A(:) is a column vector of length m*n.  The index operation A(i) accesses the ith entry in the vector A(:).  This is called linear indexing in MATLAB.  It is not yet available for GraphBLAS matrices in this MATLAB interface to GraphBLAS, but it could be added in the future.</p><p>(6) Implicit singleton dimension expansion</p><p>In MATLAB C=A+B where A is m-by-n and B is a 1-by-n row vector implicitly expands B to a matrix, computing C(i,j)=A(i,j)+B(j).  This implicit expansion is not yet suported in GraphBLAS with C=A+B. However, it can be done with C = GrB.mxm ('+.+', A, diag(GrB(B))). That's an nice example of the power of semirings, but it's not immediately obvious, and not as clear a syntax as C=A+B.  The GraphBLAS/@GrB/dnn.m function uses this 'plus.plus' semiring to apply the bias to each neuron.</p><pre class="codeinput">A = magic (3)
+B = 1000:1000:3000
+C1 = A + B
+C2 = GrB.mxm (<span class="string">'+.+'</span>, A, diag (GrB (B)))
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">A =
+     8     1     6
+     3     5     7
+     4     9     2
+B =
+        1000        2000        3000
+C1 =
+        1008        2001        3006
+        1003        2005        3007
+        1004        2009        3002
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    1008
+    (2,1)    1003
+    (3,1)    1004
+    (1,2)    2001
+    (2,2)    2005
+    (3,2)    2009
+    (1,3)    3006
+    (2,3)    3007
+    (3,3)    3002
+
+err =
+     0
+</pre><h2 id="70">GraphBLAS operations</h2><p>In addition to the overloaded operators (such as C=A*B) and overloaded functions (such as L=tril(A)), GraphBLAS also has methods of the form GrB.method, listed on the next page.  Most of them take an optional input matrix Cin, which is the initial value of the matrix C for the expression below, an optional mask matrix M, and an optional accumulator operator.</p><pre>    C&lt;#M,replace&gt; = accum (C, T)</pre><p>In the above expression, #M is either empty (no mask), M (with a mask matrix) or ~M (with a complemented mask matrix), as determined by the descriptor.  'replace' can be used to clear C after it is used in accum(C,T) but before it is assigned with C&lt;...&gt; = Z, where Z=accum(C,T).  The matrix T is the result of some operation, such as T=A*B for GrB.mxm, or T=op(A,B) for GrB.eadd.</p><p>A summary of these GrB.methods is on the next pages.</p><h2 id="71">Methods for the GrB class:</h2><pre class="language-matlab">These <span class="string">methods</span> <span class="string">operate</span> <span class="string">on</span> <span class="string">GraphBLAS</span> <span class="string">matrices</span> <span class="string">only</span>, and <span class="string">they</span> <span class="string">overload</span>
+the <span class="string">existing</span> <span class="string">MATLAB</span> <span class="string">functions</span> <span class="string">of</span> <span class="string">the</span> <span class="string">same</span> <span class="string">name.</span>
+</pre><pre class="language-matlab">C = GrB (<span class="keyword">...</span><span class="comment">)           construct a GraphBLAS matrix</span>
+C = sparse (G)          makes a copy of a GrB matrix
+C = full (G, <span class="keyword">...</span><span class="comment">)       adds explicit zeros or id values to a GrB matrix</span>
+C = double (G)          cast GrB matrix to MATLAB sparse double matrix
+C = logical (G)         cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">sparse</span> <span class="string">logical</span> <span class="string">matrix</span>
+C = complex (G)         cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">sparse</span> <span class="string">complex</span>
+C = single (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">single</span> <span class="string">matrix</span>
+C = int8 (G)            cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int8</span> <span class="string">matrix</span>
+C = int16 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int16</span> <span class="string">matrix</span>
+C = int32 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int32</span> <span class="string">matrix</span>
+C = int64 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int64</span> <span class="string">matrix</span>
+C = uint8 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint8</span> <span class="string">matrix</span>
+C = uint16 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint16</span> <span class="string">matrix</span>
+C = uint32 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint32</span> <span class="string">matrix</span>
+C = uint64 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint64</span> <span class="string">matrix</span>
+C = cast (G,<span class="keyword">...</span><span class="comment">)        cast GrB matrix to MATLAB matrix (as above)</span>
+</pre><pre class="language-matlab">X = nonzeros (G)        extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+[I,J,X] = find (G)      extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+C = spones (G)          <span class="keyword">return</span> pattern <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span>
+disp (G, level)         display <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+display (G)             display <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>; same <span class="string">as</span> <span class="string">disp(G,2)</span>
+mn = numel (G)          m*n <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+e = nnz (G)             number <span class="string">of</span> <span class="string">entries</span> <span class="string">in</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+e = nzmax (G)           number <span class="string">of</span> <span class="string">entries</span> <span class="string">in</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+[m n] = size (G)        size <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+n = length (G)          length <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">vector</span>
+s = isempty (G)         true <span class="string">if</span> <span class="string">any</span> <span class="string">dimension</span> <span class="string">of</span> <span class="string">G</span> <span class="string">is</span> <span class="string">zero</span>
+s = issparse (G)        true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+s = ismatrix (G)        true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+s = isvector (G)        true <span class="string">if</span> <span class="string">m=1</span> <span class="string">or</span> <span class="string">n=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = iscolumn (G)        true <span class="string">if</span> <span class="string">n=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = isrow (G)           true <span class="string">if</span> <span class="string">m=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = isscalar (G)        true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">a</span> <span class="string">1-by-1</span> <span class="string">GrB</span> <span class="string">matrix</span>
+s = isnumeric (G)       true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span> <span class="string">(even logical)</span>
+s = isfloat (G)         true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">double</span>, single, complex
+s = isreal (G)          true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">not</span> <span class="string">complex</span>
+s = isinteger (G)       true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">int8</span>, int16, <span class="keyword">...</span><span class="comment">, uint64</span>
+s = islogical (G)       true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">logical</span>
+s = isa (G, classname)  check <span class="string">if</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">of</span> <span class="string">a</span> <span class="string">specific</span> <span class="string">class</span>
+</pre><pre class="language-matlab">C = diag (G,k)          diagonal <span class="string">matrices</span> <span class="string">and</span> <span class="string">diagonals</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+L = tril (G,k)          lower <span class="string">triangular</span> <span class="string">part</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+U = triu (G,k)          upper <span class="string">triangular</span> <span class="string">part</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+C = kron (A,B)          Kronecker <span class="string">product</span>
+C = repmat (G, <span class="keyword">...</span><span class="comment">)     replicate and tile a GraphBLAS matrix</span>
+C = reshape (G, <span class="keyword">...</span><span class="comment">)    reshape a GraphBLAS matrix</span>
+C = abs (G)             absolute value
+C = sign (G)            signum <span class="string">function</span>
+s = istril (G)          true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">lower</span> <span class="string">triangular</span>
+s = istriu (G)          true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">upper</span> <span class="string">triangular</span>
+s = isbanded (G,<span class="keyword">...</span><span class="comment">)    true if G is banded</span>
+s = isdiag (G)          true <span class="keyword">if</span> G is <span class="string">diagonal</span>
+s = ishermitian (G)     true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">Hermitian</span>
+s = issymmetric (G)     true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">symmetric</span>
+[lo,hi] = bandwidth (G) determine <span class="string">the</span> <span class="string">lower</span> <span class="string">&amp;</span> <span class="string">upper</span> <span class="string">bandwidth</span> <span class="string">of</span> <span class="string">G</span>
+C = sum (G, option)     reduce <span class="string">via</span> <span class="string">sum</span>, to <span class="string">vector</span> <span class="string">or</span> <span class="string">scalar</span>
+C = prod (G, option)    reduce <span class="string">via</span> <span class="string">product</span>, to <span class="string">vector</span> <span class="string">or</span> <span class="string">scalar</span>
+s = norm (G, kind)      1-norm or <span class="string">inf-norm</span> <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+C = max (G, <span class="keyword">...</span><span class="comment">)        reduce via max, to vector or scalar</span>
+C = min (G, <span class="keyword">...</span><span class="comment">)        reduce via min, to vector or scalar</span>
+C = any (G, <span class="keyword">...</span><span class="comment">)        reduce via '|', to vector or scalar</span>
+C = all (G, <span class="keyword">...</span><span class="comment">)        reduce via '&amp;', to vector or scalar</span>
+</pre><pre class="language-matlab">C = sqrt (G)            element-wise square <span class="string">root</span>
+C = eps (G)             floating-point spacing
+C = ceil (G)            round <span class="string">towards</span> <span class="string">infinity</span>
+C = floor (G)           round <span class="string">towards</span> <span class="string">-infinity</span>
+C = round (G)           round <span class="string">towards</span> <span class="string">nearest</span>
+C = fix (G)             round <span class="string">towards</span> <span class="string">zero</span>
+C = isfinite (G)        test <span class="string">if</span> <span class="string">finite</span>
+C = isinf (G)           test <span class="string">if</span> <span class="string">infinite</span>
+C = isnan (G)           test <span class="string">if</span> <span class="string">NaN</span>
+C = spfun (fun, G)      evaluate <span class="string">a</span> <span class="string">function</span> <span class="string">on</span> <span class="string">the</span> <span class="string">entries</span> <span class="string">of</span> <span class="string">G</span>
+p = amd (G)             approximate <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = colamd (G)          column <span class="string">approximate</span> <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = symamd (G)          approximate <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = symrcm (G)          reverse <span class="string">Cuthill-McKee</span> <span class="string">ordering</span>
+[<span class="keyword">...</span><span class="comment">] = dmperm (G)      Dulmage-Mendelsohn permutation</span>
+parent = etree (G)      elimination tree
+C = conj (G)            complex conjugate
+C = real (G)            real part of a complex GraphBLAS matrix
+[V, <span class="keyword">...</span><span class="comment">] = eig (G,...)  eigenvalues and eigenvectors</span>
+assert (G)              generate an error <span class="keyword">if</span> G is <span class="string">false</span>
+C = zeros (<span class="keyword">...</span><span class="comment">,'like',G)   all-zero matrix, same type as G</span>
+C = false (<span class="keyword">...</span><span class="comment">,'like',G)   all-false logical matrix</span>
+C = ones (<span class="keyword">...</span><span class="comment">,'like',G)    matrix with all ones, same type as G</span>
+</pre><h2 id="75">Operator overloading:</h2><pre class="language-matlab">C = plus (A, B)         C = A + B
+C = minus (A, B)        C = A - B
+C = uminus (G)          C = -G
+C = uplus (G)           C = +G
+C = times (A, B)        C = A .* B
+C = mtimes (A, B)       C = A * B
+C = rdivide (A, B)      C = A ./ B
+C = ldivide (A, B)      C = A .\ B
+C = mrdivide (A, B)     C = A / B
+C = mldivide (A, B)     C = A \ B
+C = power (A, B)        C = A .^ B
+C = mpower (A, B)       C = A ^ B
+C = lt (A, B)           C = A &lt; B
+C = gt (A, B)           C = A &gt; B
+C = le (A, B)           C = A &lt;= B
+C = ge (A, B)           C = A &gt;= B
+C = ne (A, B)           C = A ~= B
+C = eq (A, B)           C = A == B
+C = and (A, B)          C = A &amp; B
+C = or (A, B)           C = A | B
+C = not (G)             C = ~G
+C = ctranspose (G)      C = G'
+C = transpose (G)       C = G.'
+C = horzcat (A, B)      C = [A , B]
+C = vertcat (A, B)      C = [A ; B]
+C = subsref (A, I, J)   C = A (I,J) or <span class="string">C</span> <span class="string">=</span> <span class="string">A</span> <span class="string">(M)</span>
+C = subsasgn (A, I, J)  C (I,J) = A
+index = end (A, k, n)   <span class="keyword">for</span> object indexing, A(1:end,1:end)
+</pre><h2 id="76">Static Methods:</h2><pre class="language-matlab">The <span class="string">Static</span> <span class="string">Methods</span> <span class="string">for</span> <span class="string">the</span> <span class="string">GrB</span> <span class="string">class</span> <span class="string">can</span> <span class="string">be</span> <span class="string">used</span> <span class="string">on</span> <span class="string">input</span> <span class="string">matrices</span> <span class="string">of</span>
+any <span class="string">kind:</span> <span class="string">GraphBLAS</span> <span class="string">sparse</span> <span class="string">matrices</span>, MATLAB <span class="string">sparse</span> <span class="string">matrices</span>, or
+MATLAB <span class="string">dense</span> <span class="string">matrices</span>, in <span class="string">any</span> <span class="string">combination.</span>  <span class="string">The</span> <span class="string">output</span> <span class="string">matrix</span> <span class="string">Cout</span> <span class="string">is</span>
+a <span class="string">GraphBLAS</span> <span class="string">matrix</span>, by <span class="string">default</span>, but <span class="string">can</span> <span class="string">be</span> <span class="string">optionally</span> <span class="string">returned</span> <span class="string">as</span> <span class="string">a</span>
+MATLAB <span class="string">sparse</span> <span class="string">or</span> <span class="string">dense</span> <span class="string">matrix.</span>  <span class="string">The</span> <span class="string">static</span> <span class="string">methods</span> <span class="string">divide</span> <span class="string">into</span> <span class="string">two</span>
+categories: those that <span class="string">perform</span> <span class="string">basic</span> <span class="string">functions</span>, and <span class="string">the</span> <span class="string">GraphBLAS</span>
+operations <span class="string">that</span> <span class="string">use</span> <span class="string">the</span> <span class="string">mask/accum.</span>
+</pre><h2 id="77">GraphBLAS basic functions:</h2><pre class="language-matlab">GrB.clear                    clear <span class="string">GraphBLAS</span> <span class="string">workspace</span> <span class="string">and</span> <span class="string">settings</span>
+GrB.descriptorinfo (d)       list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">descriptor</span>
+GrB.unopinfo (op, type)      list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">unary</span> <span class="string">operator</span>
+GrB.binopinfo (op, type)     list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">binary</span> <span class="string">operator</span>
+GrB.monoidinfo (op, type)    list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">monoid</span>
+GrB.semiringinfo (s, type)   list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">semiring</span>
+t = GrB.threads (t)          set/get # of <span class="string">threads</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+c = GrB.chunk (c)            set/get chunk <span class="string">size</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+b = GrB.burble (b)           set/get burble (diagnostic output)
+result = GrB.entries (G,<span class="keyword">...</span><span class="comment">) count or query entries in a matrix</span>
+result = GrB.nonz (G,<span class="keyword">...</span><span class="comment">)    count or query nonzeros in a matrix</span>
+C = GrB.prune (A, id)        prune entries equal to id
+C = GrB.offdiag (A)          prune <span class="string">diagonal</span> <span class="string">entries</span>
+s = GrB.isfull (A)           true <span class="string">if</span> <span class="string">all</span> <span class="string">entries</span> <span class="string">present</span>
+[C,I,J] = GrB.compact (A,id) remove <span class="string">empty</span> <span class="string">rows</span> <span class="string">and</span> <span class="string">columns</span>
+G = GrB.empty (m, n)         <span class="keyword">return</span> an <span class="string">empty</span> <span class="string">GraphBLAS</span> <span class="string">matrix</span>
+s = GrB.type (A)             get <span class="string">the</span> <span class="string">type</span> <span class="string">of</span> <span class="string">a</span> <span class="string">MATLAB</span> <span class="string">or</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">A</span>
+s = GrB.issigned (type)      true <span class="string">if</span> <span class="string">type</span> <span class="string">is</span> <span class="string">signed</span>
+f = GrB.format (f)           set/get matrix <span class="string">format</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+s = GrB.isbyrow (A)          true <span class="string">if</span> <span class="string">format</span> <span class="string">f</span> <span class="string">A</span> <span class="string">is</span> <span class="string">'by row'</span>
+s = GrB.isbycol (A)          true <span class="string">if</span> <span class="string">format</span> <span class="string">f</span> <span class="string">A</span> <span class="string">is</span> <span class="string">'by col'</span>
+C = GrB.expand (scalar, A)   expand <span class="string">a</span> <span class="string">scalar</span> <span class="string">(C = scalar*spones(A))</span>
+C = GrB.eye                  identity <span class="string">matrix</span> <span class="string">of</span> <span class="string">any</span> <span class="string">type</span>
+C = GrB.speye                identity <span class="string">matrix</span> <span class="string">(of type 'double')</span>
+C = GrB.build (I, J, X, m, n, dup, type, desc)
+                             build <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">from</span> <span class="string">list</span> <span class="string">of</span> <span class="string">entries</span>
+[I,J,X] = GrB.extracttuples (A, desc)
+                             extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">matrix</span>
+s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
+</pre><h2 id="78">GraphBLAS operations with Cout, mask M, and accum.</h2><pre class="language-matlab">Cout = GrB.mxm (Cin, M, accum, semiring, A, B, desc)
+                sparse <span class="string">matrix-matrix</span> <span class="string">multiplication</span> <span class="string">over</span> <span class="string">a</span> <span class="string">semiring</span>
+Cout = GrB.select (Cin, M, accum, op, A, b, desc)
+                select <span class="string">a</span> <span class="string">subset</span> <span class="string">of</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">matrix</span>
+Cout = GrB.assign (Cin, M, accum, A, I, J, desc)
+                sparse <span class="string">matrix</span> <span class="string">assignment</span>, such <span class="string">as</span> <span class="string">C(I,J)=A</span>
+Cout = GrB.subassign (Cin, M, accum, A, I, J, desc)
+                sparse <span class="string">matrix</span> <span class="string">assignment</span>, such <span class="string">as</span> <span class="string">C(I,J)=A</span>
+Cout = GrB.vreduce (Cin, M, accum, op, A, desc)
+                reduce <span class="string">a</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">a</span> <span class="string">vector</span>
+Cout = GrB.reduce (Cin, accum, op, A, desc)
+                reduce <span class="string">a</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">a</span> <span class="string">scalar</span>
+Cout = GrB.kronecker (Cin, M, accum, op, A, B, desc)
+                Kronecker <span class="string">product</span>
+Cout = GrB.trans (Cin, M, accum, A, desc)
+                transpose <span class="string">a</span> <span class="string">matrix</span>
+Cout = GrB.eadd (Cin, M, accum, op, A, B, desc)
+                element-wise addition
+Cout = GrB.emult (Cin, M, accum, op, A, B, desc)
+                element-wise multiplication
+Cout = GrB.apply (Cin, M, accum, op, A, desc)
+                apply <span class="string">a</span> <span class="string">unary</span> <span class="string">operator</span>
+Cout = GrB.extract (Cin, M, accum, A, I, J, desc)
+                extract <span class="string">submatrix</span>, like <span class="string">C=A(I,J)</span> <span class="string">in</span> <span class="string">MATLAB</span>
+</pre><p>GraphBLAS operations (with Cout, Cin arguments) take the following form:</p><pre class="language-matlab">C&lt;#M,replace&gt; = accum (C, operation (A or A', B or B'))
+</pre><pre class="language-matlab">C <span class="string">is</span> <span class="string">both</span> <span class="string">an</span> <span class="string">input</span> <span class="string">and</span> <span class="string">output</span> <span class="string">matrix.</span>  <span class="string">In</span> <span class="string">this</span> <span class="string">MATLAB</span> <span class="string">interface</span> <span class="string">to</span>
+GraphBLAS, it <span class="string">is</span> <span class="string">split</span> <span class="string">into</span> <span class="string">Cin</span> <span class="string">(the value of C on input)</span> <span class="string">and</span> <span class="string">Cout</span>
+(the value of C on output).  M is <span class="string">the</span> <span class="string">optional</span> <span class="string">mask</span> <span class="string">matrix</span>, and <span class="string">#M</span> <span class="string">is</span>
+either <span class="string">M</span> <span class="string">or</span> <span class="string">!M</span> <span class="string">depending</span> <span class="string">on</span> <span class="string">whether</span> <span class="string">or</span> <span class="string">not</span> <span class="string">the</span> <span class="string">mask</span> <span class="string">is</span> <span class="string">complemented</span>
+via <span class="string">the</span> <span class="string">desc.mask</span> <span class="string">option.</span>  <span class="string">The</span> <span class="string">replace</span> <span class="string">option</span> <span class="string">is</span> <span class="string">determined</span> <span class="string">by</span>
+desc.out; <span class="keyword">if</span> present, C <span class="string">is</span> <span class="string">cleared</span> <span class="string">after</span> <span class="string">it</span> <span class="string">is</span> <span class="string">used</span> <span class="string">in</span> <span class="string">the</span> <span class="string">accum</span>
+operation <span class="string">but</span> <span class="string">before</span> <span class="string">the</span> <span class="string">final</span> <span class="string">assignment.</span>  <span class="string">A</span> <span class="string">and/or</span> <span class="string">B</span> <span class="string">may</span> <span class="string">optionally</span>
+be <span class="string">transposed</span> <span class="string">via</span> <span class="string">the</span> <span class="string">descriptor</span> <span class="string">fields</span> <span class="string">desc.in0</span> <span class="string">and</span> <span class="string">desc.in1</span>,
+respectively.  To select <span class="string">the</span> <span class="string">format</span> <span class="string">of</span> <span class="string">Cout</span>, use <span class="string">desc.format.</span>  <span class="string">See</span>
+GrB.descriptorinfo <span class="keyword">for</span> more details.
+</pre><pre class="language-matlab">accum <span class="string">is</span> <span class="string">optional</span>; <span class="keyword">if</span> not is <span class="string">not</span> <span class="string">present</span>, then <span class="string">the</span> <span class="string">operation</span> <span class="string">becomes</span>
+C&lt;<span class="keyword">...</span><span class="comment">&gt; = operation(A,B).  Otherwise, C = C + operation(A,B) is</span>
+computed where <span class="string">'+'</span> <span class="string">is</span> <span class="string">the</span> <span class="string">accum</span> <span class="string">operator.</span>  <span class="string">It</span> <span class="string">acts</span> <span class="string">like</span> <span class="string">a</span> <span class="string">sparse</span>
+matrix <span class="string">addition</span> <span class="string">(see GrB.eadd)</span>, in <span class="string">terms</span> <span class="string">of</span> <span class="string">the</span> <span class="string">structure</span> <span class="string">of</span> <span class="string">the</span>
+result <span class="string">C</span>, but <span class="string">any</span> <span class="string">binary</span> <span class="string">operator</span> <span class="string">can</span> <span class="string">be</span> <span class="string">used.</span>
+</pre><pre class="language-matlab">The <span class="string">mask</span> <span class="string">M</span> <span class="string">acts</span> <span class="string">like</span> <span class="string">MATLAB</span> <span class="string">logical</span> <span class="string">indexing.</span>  <span class="string">If</span> <span class="string">M(i,j)=1</span> <span class="string">then</span>
+C(i,j) can <span class="string">be</span> <span class="string">modified</span>; <span class="keyword">if</span> zero, it <span class="string">cannot</span> <span class="string">be</span> <span class="string">modified</span> <span class="string">by</span> <span class="string">the</span>
+operation.
+</pre><h2 id="80">Static Methods for graph algorithms:</h2><pre class="language-matlab">r = GrB.pagerank (A, opts) ;            <span class="comment">% PageRank of a matrix</span>
+C = GrB.ktruss (A, k, check) ;          <span class="comment">% k-truss</span>
+s = GrB.tricount (A, check) ;           <span class="comment">% triangle count</span>
+L = GrB.laplacian (A, type, check) ;    <span class="comment">% Laplacian graph</span>
+C = GrB.incidence (A, <span class="keyword">...</span><span class="comment">) ;            % incidence matrix</span>
+[v, parent] = GrB.bfs (A, s, <span class="keyword">...</span><span class="comment">) ;     % breadth-first search</span>
+iset = GrB.mis (A, check) ;             <span class="comment">% maximal independent set</span>
+Y = GrB.dnn (W, bias, Y0) ;             <span class="comment">% deep neural network</span>
+</pre><pre class="language-matlab">More <span class="string">graph</span> <span class="string">algorithms</span> <span class="string">will</span> <span class="string">be</span> <span class="string">added</span> <span class="string">in</span> <span class="string">the</span> <span class="string">future.</span>
+</pre><p>Thanks for watching!</p><p>Tim Davis, Texas A&amp;M University, <a href="http://faculty.cse.tamu.edu/davis">http://faculty.cse.tamu.edu/davis</a> See also sparse, doc sparse, and https://twitter.com/DocSparse</p><p class="footer"><br><a href="https://www.mathworks.com/products/matlab/">Published with MATLAB&reg; R2018a</a><br></p></div><!--
+##### SOURCE BEGIN #####
+%% GraphBLAS: graph algorithms in the language of linear algebra
+% GraphBLAS is a library for creating graph algorithms based on sparse
+% linear algebraic operations over semirings.  Visit http://graphblas.org
+% for more details and resources.  See also the SuiteSparse:GraphBLAS
+% User Guide in this package.
+%
+% SuiteSparse:GraphBLAS, (c) 2017-2020, Tim Davis, Texas A&M University,
+% http://faculty.cse.tamu.edu/davis
+
+%% GraphBLAS: faster and more general sparse matrices for MATLAB
+% GraphBLAS is not only useful for creating graph algorithms; it also
+% supports a wide range of sparse matrix data types and operations.
+% MATLAB can compute C=A*B with just two semirings: 'plus.times.double'
+% and 'plus.times.complex' for complex matrices.  GraphBLAS has 1,040
+% unique built-in semirings, such as 'max.plus'
+% (https://en.wikipedia.org/wiki/Tropical_semiring).  These semirings can
+% be used to construct a wide variety of graph algorithms, based on
+% operations on sparse adjacency matrices.
+%
+% GraphBLAS supports sparse double and single precision matrices,
+% logical, and sparse integer matrices: int8, int16, int32, int64, uint8,
+% uint16, uint32, and uint64.  Complex matrices will be added in the
+% future.
+
+clear all
+format compact
+rng ('default') ;
+X = 100 * rand (2) ;
+G = GrB (X)              % GraphBLAS copy of a matrix X, same type
+
+%% Sparse integer matrices
+% Here's an int8 version of the same matrix:
+
+S = int8 (G)            % convert G to a full MATLAB int8 matrix
+G = GrB (X, 'int8')      % a GraphBLAS sparse int8 matrix
+
+%% Sparse single-precision matrices
+% Matrix operations in GraphBLAS are typically as fast, or faster than
+% MATLAB.  Here's an unfair comparison: computing X^2 with MATLAB in
+% double precision and with GraphBLAS in single precision.  You would
+% naturally expect GraphBLAS to be faster. 
+%
+% Please wait ...
+
+n = 1e5 ;
+X = spdiags (rand (n, 201), -100:100, n, n) ;
+G = GrB (X, 'single') ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+tic
+X2 = X^2 ;
+matlab_time = toc ;
+fprintf ('\nGraphBLAS time: %g sec (in single)\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec (in double)\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% Mixing MATLAB and GraphBLAS matrices
+% The error in the last computation is about eps('single') since
+% GraphBLAS did its computation in single precision, while MATLAB used
+% double precision.  MATLAB and GraphBLAS matrices can be easily
+% combined, as in X2-G2.  The sparse single precision matrices take less
+% memory space.
+
+err = norm (X2 - G2, 1) / norm (X2,1)
+eps ('single')
+whos G G2 X X2
+
+%% Faster matrix operations
+% But even with standard double precision sparse matrices, GraphBLAS is
+% typically faster than the built-in MATLAB methods.  Here's a fair
+% comparison:
+
+G = GrB (X) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+err = norm (X2 - G2, 1) / norm (X2,1)
+fprintf ('\nGraphBLAS time: %g sec (in double)\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec (in double)\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% A wide range of semirings
+% MATLAB can only compute C=A*B using the standard '+.*.double' and
+% '+.*.complex' semirings.  A semiring is defined in terms of a string,
+% 'add.mult.type', where 'add' is a monoid that takes the place of the
+% additive operator, 'mult' is the multiplicative operator, and 'type' is
+% the data type for the two inputs to the mult operator (the type
+% defaults to the type of A for C=A*B).
+%
+% In the standard semiring, C=A*B is defined as:
+%
+%   C(i,j) = sum (A(i,:).' .* B(:,j))
+%
+% using 'plus' as the monoid and 'times' as the multiplicative operator.
+% But in a more general semiring, 'sum' can be any monoid, which is an
+% associative and commutative operator that has an identity value.  For
+% example, in the 'max.plus' tropical algebra, C(i,j) for C=A*B is
+% defined as:
+%
+%   C(i,j) = max (A(i,:).' + B(:,j))
+
+%%
+% This can be computed in GraphBLAS with:
+%
+%   C = GrB.mxm ('max.+', A, B)
+
+n = 3 ;
+A = rand (n) ;
+B = rand (n) ;
+C = zeros (n) ;
+for i = 1:n
+    for j = 1:n
+        C(i,j) = max (A (i,:).' + B (:,j)) ;
+    end
+end
+C2 = GrB.mxm ('max.+', A, B) ;
+fprintf ('\nerr = norm (C-C2,1) = %g\n', norm (C-C2,1)) ;
+
+%% The max.plus tropical semiring
+% Here are details of the "max.plus" tropical semiring.  The identity
+% value is -inf since max(x,-inf) = max (-inf,x) = -inf for any x.
+
+GrB.semiringinfo ('max.+.double') ;
+
+%% A boolean semiring
+% MATLAB cannot multiply two logical matrices.  MATLAB R2019a converts
+% them to double and uses the conventional +.*.double semiring instead.
+% In GraphBLAS, this is the common Boolean 'or.and.logical' semiring,
+% which is widely used in linear algebraic graph algorithms.
+
+GrB.semiringinfo ('|.&.logical') ;
+
+%%
+clear
+A = sparse (rand (3) > 0.5)
+B = sparse (rand (3) > 0.2)
+
+%%
+try
+    % MATLAB R2019a does this by casting A and B to double
+    C1 = A*B
+catch
+    % MATLAB R2018a throws an error
+    fprintf ('MATLAB R2019a required for C=A*B with logical\n') ;
+    fprintf ('matrices.  Explicitly converting to double:\n') ;
+    C1 = double (A) * double (B)
+end
+C2 = GrB (A) * GrB (B)
+
+%%
+% Note that C1 is a MATLAB sparse double matrix, and contains non-binary
+% values.  C2 is a GraphBLAS logical matrix.
+whos
+GrB.type (C2)
+
+%% GraphBLAS operators, monoids, and semirings
+% The C interface for SuiteSparse:GraphBLAS allows for arbitrary types
+% and operators to be constructed.  However, the MATLAB interface to
+% SuiteSparse:GraphBLAS is restricted to pre-defined types and operators:
+% a mere 11 types, 66 unary operators, 275 binary operators, 44 monoids,
+% 16 select operators, and 1,865 semirings (1,040 of which are unique,
+% since some binary operators are equivalent: 'min.logical' and
+% '&.logical' are the same thing, for example).  The complex type and
+% its binary operators, monoids, and semirings will be added in the
+% near future.
+%
+% That gives you a lot of tools to create all kinds of interesting
+% graph algorithms.  For example:
+%
+%   GrB.bfs    % breadth-first search
+%   GrB.dnn    % sparse deep neural network (http://graphchallenge.org)
+%   GrB.mis    % maximal independent set
+%
+% See 'help GrB.binopinfo' for a list of the binary operators, and
+% 'help GrB.monoidinfo' for the ones that can be used as the additive
+% monoid in a semiring.
+
+%% 
+help GrB.binopinfo
+
+%% 
+help GrB.monoidinfo
+
+%% Element-wise operations
+% Binary operators can be used in element-wise matrix operations, like
+% C=A+B and C=A.*B.  For the matrix addition C=A+B, the pattern of C is
+% the set union of A and B, and the '+' operator is applied for entries
+% in the intersection.  Entries in A but not B, or in B but not A, are
+% assigned to C without using the operator.  The '+' operator is used for
+% C=A+B but any operator can be used with GrB.eadd.
+
+%%
+A = GrB (sprand (3, 3, 0.5)) ;
+B = GrB (sprand (3, 3, 0.5)) ;
+C1 = A + B
+C2 = GrB.eadd ('+', A, B)
+err = norm (C1-C2,1)
+
+%% Subtracting two matrices
+% A-B and GrB.eadd ('-', A, B) are not the same thing, since the '-'
+% operator is not applied to an entry that is in B but not A.
+
+C1 = A-B 
+C2 = GrB.eadd ('-', A, B)
+
+%% 
+% But these give the same result
+
+C1 = A-B 
+C2 = GrB.eadd ('+', A, GrB.apply ('-', B))
+err = norm (C1-C2,1)
+
+%% Element-wise 'multiplication'
+% For C = A.*B, the result C is the set intersection of the pattern of A
+% and B.  The operator is applied to entries in both A and B.  Entries in
+% A but not B, or B but not A, do not appear in the result C.
+
+C1 = A.*B
+C2 = GrB.emult ('*', A, B) 
+C3 = double (A) .* double (B)
+
+%%
+% Just as in GrB.eadd, any operator can be used in GrB.emult:
+
+A
+B
+C2 = GrB.emult ('max', A, B) 
+
+%% Overloaded operators
+% The following operators all work as you would expect for any matrix.
+% The matrices A and B can be GraphBLAS matrices, or MATLAB sparse or
+% dense matrices, in any combination, or scalars where appropriate:
+%
+%    A+B   A-B  A*B   A.*B  A./B  A.\B  A.^b   A/b   C=A(I,J)
+%    -A    +A   ~A    A'    A.'   A&B   A|B    b\A   C(I,J)=A
+%    A~=B  A>B  A==B  A<=B  A>=B  A<B   [A,B]  [A;B]
+%    A(1:end,1:end)
+%
+% For A^b, b must be a non-negative integer.
+
+C1 = [A B] ;
+C2 = [double(A) double(B)] ;
+assert (isequal (double (C1), C2))
+
+%%
+C1 = A^2
+C2 = double (A)^2 ;
+err = norm (C1 - C2, 1)
+assert (err < 1e-12)
+
+%%
+C1 = A (1:2,2:end)
+A = double (A) ;
+C2 = A (1:2,2:end) ;
+assert (isequal (double (C1), C2))
+
+%% Overloaded functions
+% Many MATLAB built-in functions can be used with GraphBLAS matrices:
+%
+% A few differences with the built-in functions:
+%
+%   S = sparse (G)        % makes a copy of a GrB matrix
+%   F = full (G)          % adds explicit zeros, so numel(F)==nnz(F)
+%   F = full (G,type,id)  % adds explicit identity values to a GrB matrix
+%   disp (G, level)       % display a GrB matrix G; level=2 is the default.
+%
+% In the list below, the first set of Methods are overloaded built-in
+% methods.  They are used as-is on GraphBLAS matrices, such as C=abs(G).
+% The Static methods are prefixed with "GrB.", as in C = GrB.apply ( ... ).
+
+%%
+
+methods GrB
+
+%% Zeros are handled differently
+% Explicit zeros cannot be automatically dropped from a GraphBLAS matrix,
+% like they are in MATLAB sparse matrices.  In a shortest-path problem,
+% for example, an edge A(i,j) that is missing has an infinite weight,
+% (the monoid identity of min(x,y) is +inf).  A zero edge weight A(i,j)=0
+% is very different from an entry that is not present in A.  However, if
+% a GraphBLAS matrix is converted into a MATLAB sparse matrix, explicit
+% zeros are dropped, which is the convention for a MATLAB sparse matrix.
+% They can also be dropped from a GraphBLAS matrix using the GrB.select
+% method.
+
+%%
+
+G = GrB (magic (2)) ;
+G (1,1) = 0      % G(1,1) still appears as an explicit entry
+A = double (G)   % but it's dropped when converted to MATLAB sparse
+H = GrB.select ('nonzero', G)  % drops the explicit zeros from G
+fprintf ('nnz (G): %d  nnz (A): %g nnz (H): %g\n', ...
+    nnz (G), nnz (A), nnz (H)) ;
+fprintf ('num entries in G: %d\n', GrB.entries (G)) ;
+
+%% Displaying contents of a GraphBLAS matrix
+% Unlike MATLAB, the default is to display just a few entries of a GrB matrix.
+% Here are all 100 entries of a 10-by-10 matrix, using a non-default disp(G,3):
+
+%%
+G = GrB (rand (10)) ;
+% display everything:
+disp (G,3)
+
+%%
+% That was disp(G,3), so every entry was printed.  It's a little long, so
+% the default is not to print everything.
+
+%%
+% With the default display (level = 2):
+G
+
+%%
+% That was disp(G,2) or just display(G), which is what is printed by a
+% MATLAB statement that doesn't have a trailing semicolon.  With
+% level = 1, disp(G,1) gives just a terse summary:
+disp (G,1)
+
+%% Storing a matrix by row or by column
+% MATLAB stores its sparse matrices by column, refered to as 'standard
+% CSC' in SuiteSparse:GraphBLAS.  In the CSC (compressed sparse column)
+% format, each column of the matrix is stored as a list of entries, with
+% their value and row index.  In the CSR (compressed sparse row) format,
+% each row is stored as a list of values and their column indices.
+% GraphBLAS uses both CSC and CSR, and the two formats can be intermixed
+% arbitrarily.  In its C interface, the default format is CSR.  However,
+% for better compatibility with MATLAB, this MATLAB interface for
+% SuiteSparse:GraphBLAS uses CSC by default instead. 
+
+%%
+rng ('default') ;
+GrB.clear ;                      % clear all prior GraphBLAS settings
+fprintf ('the default format is: %s\n', GrB.format) ;
+C = sparse (rand (2))
+G = GrB (C)
+GrB.format (G)
+
+%%
+% Many graph algorithms work better in CSR format, with matrices stored
+% by row.  For example, it is common to use A(i,j) for the edge (i,j),
+% and many graph algorithms need to access the out-adjacencies of nodes,
+% which is the row A(i,;) for node i.  If the CSR format is desired,
+% GrB.format ('by row') tells GraphBLAS to create all subsequent matrices
+% in the CSR format.  Converting from a MATLAB sparse matrix (in standard
+% CSC format) takes a little more time (requiring a transpose), but
+% subsequent graph algorithms can be faster.
+
+%%
+G = GrB (C, 'by row')
+fprintf ('the format of G is:    %s\n', GrB.format (G)) ;
+H = GrB (C)
+fprintf ('the format of H is:    %s\n', GrB.format (H)) ;
+err = norm (H-G,1)
+
+%% Hypersparse matrices
+% SuiteSparse:GraphBLAS can use two kinds of sparse matrix data
+% structures: standard and hypersparse, for both CSC and CSR formats.  In
+% the standard CSC format used in MATLAB, an m-by-n matrix A takes
+% O(n+nnz(A)) space.  MATLAB can create huge column vectors, but not huge
+% matrices (when n is huge).
+
+clear all
+[c, huge] = computer ;
+C = sparse (huge, 1)    % MATLAB can create a huge-by-1 sparse column
+try
+    C = sparse (huge, huge)     % but this fails
+catch me
+    error_expected = me
+end
+
+%%
+% In a GraphBLAS hypersparse matrix, an m-by-n matrix A takes only
+% O(nnz(A)) space.  The difference can be huge if nnz (A) << n.
+
+clear
+[c, huge] = computer ;
+G = GrB (huge, 1)            % no problem for GraphBLAS
+H = GrB (huge, huge)         % this works in GraphBLAS too
+
+%%
+% Operations on huge hypersparse matrices are very fast; no component of
+% the time or space complexity is Omega(n).
+
+I = randperm (huge, 2) ;
+J = randperm (huge, 2) ;
+H (I,J) = magic (2) ;        % add 4 nonzeros to random locations in H
+H (I,I) = 10 * [1 2 ; 3 4] ; % so H^2 is not all zero
+H = H^2 ;                    % square H
+H = (H' * 2) ;               % transpose H and double the entries
+K = pi * spones (H) ;
+H = H + K                    % add pi to each entry in H
+
+%% numel uses vpa if the matrix is really huge
+e1 = numel (G)               % this is huge, but still a flint
+e2 = numel (H)               % this is huge^2, which needs vpa
+whos e1 e2
+
+%%
+% All of these matrices take very little memory space:
+whos C G H K
+
+%% The mask and accumulator
+% When not used in overloaded operators or built-in functions, many
+% GraphBLAS methods of the form GrB.method ( ... ) can optionally use a
+% mask and/or an accumulator operator.  If the accumulator is '+' in
+% GrB.mxm, for example, then C = C + A*B is computed.  The mask acts much
+% like logical indexing in MATLAB.  With a logical mask matrix M,
+% C<M>=A*B allows only part of C to be assigned.  If M(i,j) is true, then
+% C(i,j) can be modified.  If false, then C(i,j) is not modified.
+%
+% For example, to set all values in C that are greater than 0.5 to 3:
+
+%%
+A = rand (3) 
+C = GrB.assign (A, A > 0.5, 3) ;     % in GraphBLAS
+C1 = GrB (A) ; C1 (A > .5) = 3       % also in GraphBLAS
+C2 = A      ; C2 (A > .5) = 3       % in MATLAB
+err = norm (C - C1, 1)
+err = norm (C - C2, 1)
+
+%% The descriptor
+% Most GraphBLAS functions of the form GrB.method ( ... ) take an optional
+% last argument, called the descriptor.  It is a MATLAB struct that can
+% modify the computations performed by the method.  'help
+% GrB.descriptorinfo' gives all the details.  The following is a short
+% summary of the primary settings:
+%
+% d.out  = 'default' or 'replace', clears C after the accum op is used.
+%
+% d.mask = 'default' or 'complement', to use M or ~M as the mask matrix;
+%          'structural', or 'structural complement', to use the pattern
+%           of M or ~M.
+%
+% d.in0  = 'default' or 'transpose', to transpose A for C=A*B, C=A+B, etc.
+%
+% d.in1  = 'default' or 'transpose', to transpose B for C=A*B, C=A+B, etc.
+%
+% d.kind = 'default', 'GrB', 'sparse', or 'full'; the output of GrB.method.
+
+A = sparse (rand (2)) ;
+B = sparse (rand (2)) ;
+C1 = A'*B ;
+C2 = GrB.mxm ('+.*', A, B, struct ('in0', 'transpose')) ;
+err = norm (C1-C2,1)
+
+%% Integer arithmetic is different in GraphBLAS
+% MATLAB supports integer arithmetic on its full matrices, using int8,
+% int16, int32, int64, uint8, uint16, uint32, or uint64 data types.  None
+% of these integer data types can be used to construct a MATLAB sparse
+% matrix, which can only be double, double complex, or logical.
+% Furthermore, C=A*B is not defined for integer types in MATLAB, except
+% when A and/or B are scalars.
+%
+% GraphBLAS supports all of those types for its sparse matrices (except
+% for complex, which will be added in the future).  All operations are
+% supported, including C=A*B when A or B are any integer type, for all
+% 1,865 semirings (1,040 of which are unique).
+%
+% However, integer arithmetic differs in GraphBLAS and MATLAB.  In
+% MATLAB, integer values saturate if they exceed their maximum value.  In
+% GraphBLAS, integer operators act in a modular fashion.  The latter is
+% essential when computing C=A*B over a semiring.  A saturating integer
+% operator cannot be used as a monoid since it is not associative.
+%
+% The C API for GraphBLAS allows for the creation of arbitrary
+% user-defined types, so it would be possible to create different binary
+% operators to allow element-wise integer operations to saturate,
+% perhaps:
+%
+%   C = GrB.eadd('+saturate',A,B)
+%
+% This would require an extension to this MATLAB interface.
+
+%%
+C = uint8 (magic (3)) ;
+G = GrB (C) ;
+C1 = C * 40
+C2 = G * 40
+C3 = double (G) * 40 ;
+S = double (C1 < 255) ;
+assert (isequal (double (C1).*S, double (C2).*S))
+assert (isequal (nonzeros (C2), double (mod (nonzeros (C3), 256))))
+
+%% An example graph algorithm: breadth-first search
+% The breadth-first search of a graph finds all nodes reachable from the
+% source node, and their level, v.  v=GrB.bfs(A,s) or v=bfs_matlab(A,s)
+% compute the same thing, but GrB.bfs uses GraphBLAS matrices and
+% operations, while bfs_matlab uses pure MATLAB operations.  v is defined
+% as v(s) = 1 for the source node, v(i) = 2 for nodes adjacent to the
+% source, and so on.
+
+clear all
+rng ('default') ;
+n = 1e5 ;
+A = logical (sprandn (n, n, 1e-3)) ;
+
+tic
+v1 = GrB.bfs (A, 1) ;
+gb_time = toc ;
+
+tic
+v2 = bfs_matlab (A, 1) ;
+matlab_time = toc ;
+
+assert (isequal (double (v1'), v2))
+fprintf ('\nnodes reached: %d of %d\n', nnz (v2), n) ;
+fprintf ('GraphBLAS time: %g sec\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% Example graph algorithm: Luby's method in GraphBLAS
+% The GrB.mis.m function is variant of Luby's randomized algorithm [Luby
+% 1985].  It is a parallel method for finding an maximal independent set
+% of nodes, where no two nodes are adjacent.  See the
+% GraphBLAS/@GrB/GrB.mis.m function for details.  The graph must be
+% symmetric with a zero-free diagonal, so A is symmetrized first and any
+% diagonal entries are removed.
+
+A = GrB (A) ;
+A = GrB.offdiag (A|A') ;
+
+tic
+s = GrB.mis (A) ;
+toc
+fprintf ('# nodes in the graph: %g\n', size (A,1)) ;
+fprintf ('# edges: : %g\n', GrB.entries (A) / 2) ;
+fprintf ('size of maximal independent set found: %g\n', ...
+    full (double (sum (s)))) ;
+
+% make sure it's independent
+p = find (s) ;
+S = A (p,p) ;
+assert (GrB.entries (S) == 0)
+
+% make sure it's maximal
+notp = find (s == 0) ;
+S = A (notp, p) ;
+deg = GrB.vreduce ('+.int64', S) ;
+assert (logical (all (deg > 0)))
+
+%% Sparse deep neural network
+% The 2019 MIT GraphChallenge (see http://graphchallenge.org) is to solve
+% a set of large sparse deep neural network problems.  In this demo, the
+% MATLAB reference solution is compared with a solution using GraphBLAS,
+% for a randomly constructed neural network.  See the GrB.dnn and
+% dnn_matlab.m functions for details.
+
+clear all
+rng ('default') ;
+nlayers = 16 ;
+nneurons = 4096 ;
+nfeatures = 30000 ;
+fprintf ('# layers:   %d\n', nlayers) ;
+fprintf ('# neurons:  %d\n', nneurons) ;
+fprintf ('# features: %d\n', nfeatures) ;
+
+tic
+Y0 = sprand (nfeatures, nneurons, 0.1) ;
+for layer = 1:nlayers
+    W {layer} = sprand (nneurons, nneurons, 0.01) * 0.2 ;
+    bias {layer} = -0.2 * ones (1, nneurons) ;
+end
+t_setup = toc ;
+fprintf ('construct problem time: %g sec\n', t_setup) ;
+
+% convert the problem from MATLAB to GraphBLAS
+t = tic ;
+[W_gb, bias_gb, Y0_gb] = dnn_mat2gb (W, bias, Y0) ;
+t = toc (t) ;
+fprintf ('setup time: %g sec\n', t) ;
+
+%% Solving the sparse deep neural network problem with GraphbLAS
+% Please wait ...
+
+tic
+Y1 = GrB.dnn (W_gb, bias_gb, Y0_gb) ;
+gb_time = toc ;
+fprintf ('total time in GraphBLAS: %g sec\n', gb_time) ;
+
+%% Solving the sparse deep neural network problem with MATLAB
+% Please wait ...
+
+tic
+Y2 = dnn_matlab (W, bias, Y0) ;
+matlab_time = toc ;
+fprintf ('total time in MATLAB:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+err = norm (Y1-Y2,1)
+
+%% For objects, GraphBLAS has better colon notation than MATLAB
+% The MATLAB notation C = A (start:inc:fini) is very handy, and
+% it works great if A is a MATLAB matrix.  But for objects like
+% the GraphBLAS matrix, MATLAB starts by creating the explicit
+% index vector I = start:inc:fini.  That's fine if the matrix is
+% modest in size, but GraphBLAS can construct huge matrices.
+% The problem is that 1:n cannot be explicitly constructed when n
+% is huge.
+%
+% The C API for GraphBLAS can represent the colon notation 
+% start:inc:fini in an implicit manner, so it can do the indexing
+% without actually forming the explicit list I = start:inc:fini.
+% But there is no access to this method using the MATLAB notation
+% start:inc:fini.
+%
+% Thus, to compute C = A (start:inc:fini) for very huge matrices,
+% you need to use use a cell array to represent the colon notation,
+% as { start, inc, fini }, instead of start:inc:fini. See
+% 'help GrB.extract' and 'help.gbsubassign' for, for C(I,J)=A.  The
+% syntax isn't conventional, but it is far faster than the MATLAB
+% colon notation for objects, and takes far less memory when I is huge.
+
+%%
+n = 1e14 ;
+H = GrB (n, n) ;            % a huge empty matrix
+I = [1 1e9 1e12 1e14] ;
+M = magic (4)
+H (I,I) = M ;
+J = {1, 1e13} ;            % represents 1:1e13 colon notation
+C1 = H (J, J)              % computes C1 = H (1:e13,1:1e13)
+c = nonzeros (C1) ;
+m = nonzeros (M (1:3, 1:3)) ;
+assert (isequal (c, m)) ;
+
+%%
+try
+    % try to compute the same thing with colon
+    % notation (1:1e13), but this fails:
+    C2 = H (1:1e13, 1:1e13)
+catch me
+    error_expected = me
+end
+
+%% Iterative solvers work as-is
+% Many built-in functions work with GraphBLAS matrices unmodified.
+
+A = sparse (rand (4)) ;
+b = sparse (rand (4,1)) ;
+x = gmres (A,b)
+norm (A*x-b)
+x = gmres (GrB(A), GrB(b))
+norm (A*x-b)
+
+%% ... even in single precision
+x = gmres (GrB(A,'single'), GrB(b,'single'))
+norm (A*x-b)
+
+%%
+% Both of the following uses of minres (A,b) fail to converge because A
+% is not symmetric, as the method requires.  Both failures are correctly
+% reported, and both the MATLAB version and the GraphBLAS version return
+% the same incorrect vector x.
+
+x = minres (A, b)
+x = minres (GrB(A), GrB(b))
+
+%%
+% With a proper symmetric matrix
+
+A = A+A' ;
+x = minres (A, b)
+norm (A*x-b)
+x = minres (GrB(A), GrB(b))
+norm (A*x-b)
+
+%% Extreme performance differences between GraphBLAS and MATLAB.
+% The GraphBLAS operations used so far are perhaps 2x to 50x faster than
+% the corresponding MATLAB operations, depending on how many cores your
+% computer has.  To run a demo illustrating a 500x or more speedup versus
+% MATLAB, run this demo:
+%
+%    gbdemo2
+%
+% It will illustrate an assignment C(I,J)=A that can take under a second
+% in GraphBLAS but several minutes in MATLAB.  To make the comparsion
+% even more dramatic, try:
+%
+%    gbdemo2 (20000)
+%
+% assuming you have enough memory.  The gbdemo2 is not part of this demo
+% since it can take a long time; it tries a range of problem sizes,
+% and each one takes several minutes in MATLAB.
+
+%% Sparse logical indexing is much, much faster in GraphBLAS
+% The mask in GraphBLAS acts much like logical indexing in MATLAB, but it
+% is not quite the same.  MATLAB logical indexing takes the form:
+%
+%       C (M) = A (M)
+%
+% which computes the same thing as the GraphBLAS statement:
+%
+%       C = GrB.assign (C, M, A)
+%
+% The GrB.assign statement computes C(M)=A(M), and it is vastly faster
+% than C(M)=A(M), even if the time to convert the GrB matrix back to a
+% MATLAB sparse matrix is included.
+%
+% GraphBLAS can also compute C (M) = A (M) using overloaded operators
+% for subsref and subsasgn, but C = GrB.assign (C, M, A) is a bit faster.
+%
+% First, both methods in GraphBLAS (both are very fast):
+
+clear
+n = 4000 ;
+tic
+C = sprand (n, n, 0.1) ;
+A = 100 * sprand (n, n, 0.1) ;
+M = (C > 0.5) ;
+t_setup = toc ;
+fprintf ('nnz(C): %g, nnz(M): %g, nnz(A): %g\n', ...
+    nnz(C), nnz(M), nnz(A)) ;
+fprintf ('\nsetup time:     %g sec\n', t_setup) ;
+
+% include the time to convert C1 from a GraphBLAS
+% matrix to a MATLAB sparse matrix:
+tic
+C1 = GrB.assign (C, M, A) ;
+C1 = double (C1) ;
+gb_time = toc ;
+fprintf ('\nGraphBLAS time: %g sec for GrB.assign\n', gb_time) ;
+
+% now using overloaded operators, also include the time to
+% convert back to a MATLAB sparse matrix, for good measure:
+A2 = GrB (A) ;
+C2 = GrB (C) ;
+tic
+C2 (M) = A2 (M) ;
+C2 = double (C2) ;
+gb_time2 = toc ;
+fprintf ('\nGraphBLAS time: %g sec for C(M)=A(M)\n', gb_time2) ;
+
+%%
+% Please wait, this will take about 10 minutes or so ...
+
+tic
+C (M) = A (M) ;
+matlab_time = toc ;
+
+fprintf ('\nGraphBLAS time: %g sec (GrB.assign)\n', gb_time) ;
+fprintf ('\nGraphBLAS time: %g sec (overloading)\n', gb_time2) ;
+fprintf ('MATLAB time:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time2) ;
+
+% GraphBLAS computes the exact same result with both methods:
+assert (isequal (C1, C))
+assert (isequal (C2, C))
+C1 - C
+C2 - C
+
+%% Limitations and their future solutions
+% The MATLAB interface for SuiteSparse:GraphBLAS is a work-in-progress.
+% It has some limitations, most of which will be resolved over time.
+%
+% (1) Nonblocking mode:
+%
+% GraphBLAS has a 'non-blocking' mode, in which operations can be left
+% pending and completed later.  SuiteSparse:GraphBLAS uses the
+% non-blocking mode to speed up a sequence of assignment operations, such
+% as C(I,J)=A.  However, in its MATLAB interface, this would require a
+% MATLAB mexFunction to modify its inputs.  That breaks the MATLAB API
+% standard, so it cannot be safely done.  As a result, using GraphBLAS
+% via its MATLAB interface can be slower than when using its C API.  This
+% restriction would not be a limitation if GraphBLAS were to be
+% incorporated into MATLAB itself, but there is likely no way to do this
+% in a mexFunction interface to GraphBLAS.
+
+%%
+% (2) Complex matrices:
+%
+% GraphBLAS can operate on matrices with arbitrary user-defined types and
+% operators.  The only constraint is that the type be a fixed sized
+% typedef that can be copied with the ANSI C memcpy; variable-sized types
+% are not yet supported.  However, in this MATLAB interface,
+% SuiteSparse:GraphBLAS has access to only predefined types, operators,
+% and semirings.  Complex types and operators will be added to this
+% MATLAB interface in the future.  They already appear in the C version
+% of GraphBLAS, with user-defined operators in
+% GraphBLAS/Demo/Source/usercomplex.c.
+
+%%
+% (3) Integer element-wise operations:
+%
+% Integer operations in MATLAB saturate, so that uint8(255)+1 is 255.  To
+% allow for integer monoids, GraphBLAS uses modular arithmetic instead.
+% This is the only way that C=A*B can be defined for integer semirings.
+% However, saturating integer operators could be added in the future, so
+% that element- wise integer operations on GraphBLAS sparse integer
+% matrices could work just the same as their MATLAB counterparts.
+%
+% So in the future, you could perhaps write this, for both sparse and
+% dense integer matrices A and B:
+%
+%       C = GrB.eadd ('+saturate.int8', A, B)
+%
+% to compute the same thing as C=A+B in MATLAB for its full int8
+% matrices.  Note that MATLAB can do this only for dense integer
+% matrices, since it doesn't support sparse integer matrices.
+
+%%
+% (4) Faster methods:
+%
+% Most methods in this MATLAB interface are based on efficient parallel C
+% functions in GraphBLAS itself, and are typically as fast or faster than
+% the equivalent built-in operators and functions in MATLAB.
+%
+% There are few notable exceptions; these will be addressed in the future.
+% Dense matrices and vectors held as GraphBLAS objects are slower than
+% their MATLAB counterparts.  horzcat and vertcat, for [A B] and [A;B]
+% when either A or B are GraphBLAS matrices, are also slow, as
+% illustrated below in the next example.
+%
+% Other methods that will be faster in the future include bandwidth,
+% istriu, istril, eps, ceil, floor, round, fix, isfinite, isinf, isnan,
+% spfun, and A.^B.  These methods are currently implemented in
+% m-files, not in efficient parallel C functions.
+
+%%
+% Here is an example that illustrates the performance of C = [A B]
+clear
+A = sparse (rand (2000)) ;
+B = sparse (rand (2000)) ;
+tic
+C1 = [A B] ;
+matlab_time = toc ;
+
+A = GrB (A) ;
+B = GrB (B) ;
+tic
+C2 = [A B] ;
+gb_time = toc ;
+
+err = norm (C1-C2,1)
+fprintf ('\nMATLAB: %g sec, GraphBLAS: %g sec\n', ...
+    matlab_time, gb_time) ;
+if (gb_time > matlab_time)
+    fprintf ('GraphBLAS is slower by a factor of %g\n', ...
+        gb_time / matlab_time) ;
+end
+
+%%
+% (5) Linear indexing:
+%
+% If A is an m-by-n 2D MATLAB matrix, with n > 1, A(:) is a column vector
+% of length m*n.  The index operation A(i) accesses the ith entry in the
+% vector A(:).  This is called linear indexing in MATLAB.  It is not yet
+% available for GraphBLAS matrices in this MATLAB interface to GraphBLAS,
+% but it could be added in the future.
+
+%%
+% (6) Implicit singleton dimension expansion 
+%
+% In MATLAB C=A+B where A is m-by-n and B is a 1-by-n row vector
+% implicitly expands B to a matrix, computing C(i,j)=A(i,j)+B(j).  This
+% implicit expansion is not yet suported in GraphBLAS with C=A+B.
+% However, it can be done with C = GrB.mxm ('+.+', A, diag(GrB(B))).
+% That's an nice example of the power of semirings, but it's not
+% immediately obvious, and not as clear a syntax as C=A+B.  The
+% GraphBLAS/@GrB/dnn.m function uses this 'plus.plus' semiring to
+% apply the bias to each neuron.
+
+A = magic (3)
+B = 1000:1000:3000
+C1 = A + B
+C2 = GrB.mxm ('+.+', A, diag (GrB (B)))
+err = norm (C1-C2,1)
+
+%% GraphBLAS operations
+% In addition to the overloaded operators (such as C=A*B) and overloaded
+% functions (such as L=tril(A)), GraphBLAS also has methods of the form
+% GrB.method, listed on the next page.  Most of them take an optional
+% input matrix Cin, which is the initial value of the matrix C for the
+% expression below, an optional mask matrix M, and an optional
+% accumulator operator.
+%
+%      C<#M,replace> = accum (C, T)
+%
+% In the above expression, #M is either empty (no mask), M (with a mask
+% matrix) or ~M (with a complemented mask matrix), as determined by the
+% descriptor.  'replace' can be used to clear C after it is used in
+% accum(C,T) but before it is assigned with C<...> = Z, where
+% Z=accum(C,T).  The matrix T is the result of some operation, such as
+% T=A*B for GrB.mxm, or T=op(A,B) for GrB.eadd.
+%
+% A summary of these GrB.methods is on the next pages.
+
+%% Methods for the GrB class:
+%
+%   These methods operate on GraphBLAS matrices only, and they overload
+%   the existing MATLAB functions of the same name.
+%
+%   C = GrB (...)           construct a GraphBLAS matrix
+%   C = sparse (G)          makes a copy of a GrB matrix
+%   C = full (G, ...)       adds explicit zeros or id values to a GrB matrix
+%   C = double (G)          cast GrB matrix to MATLAB sparse double matrix
+%   C = logical (G)         cast GrB matrix to MATLAB sparse logical matrix
+%   C = complex (G)         cast GrB matrix to MATLAB sparse complex
+%   C = single (G)          cast GrB matrix to MATLAB full single matrix
+%   C = int8 (G)            cast GrB matrix to MATLAB full int8 matrix
+%   C = int16 (G)           cast GrB matrix to MATLAB full int16 matrix
+%   C = int32 (G)           cast GrB matrix to MATLAB full int32 matrix
+%   C = int64 (G)           cast GrB matrix to MATLAB full int64 matrix
+%   C = uint8 (G)           cast GrB matrix to MATLAB full uint8 matrix
+%   C = uint16 (G)          cast GrB matrix to MATLAB full uint16 matrix
+%   C = uint32 (G)          cast GrB matrix to MATLAB full uint32 matrix
+%   C = uint64 (G)          cast GrB matrix to MATLAB full uint64 matrix
+%   C = cast (G,...)        cast GrB matrix to MATLAB matrix (as above)
+
+%%
+%   X = nonzeros (G)        extract all entries from a GrB matrix
+%   [I,J,X] = find (G)      extract all entries from a GrB matrix
+%   C = spones (G)          return pattern of GrB matrix
+%   disp (G, level)         display a GrB matrix G
+%   display (G)             display a GrB matrix G; same as disp(G,2)
+%   mn = numel (G)          m*n for an m-by-n GrB matrix G
+%   e = nnz (G)             number of entries in a GrB matrix G
+%   e = nzmax (G)           number of entries in a GrB matrix G
+%   [m n] = size (G)        size of a GrB matrix G
+%   n = length (G)          length of a GrB vector
+%   s = isempty (G)         true if any dimension of G is zero
+%   s = issparse (G)        true for any GrB matrix G
+%   s = ismatrix (G)        true for any GrB matrix G
+%   s = isvector (G)        true if m=1 or n=1, for an m-by-n GrB matrix G
+%   s = iscolumn (G)        true if n=1, for an m-by-n GrB matrix G
+%   s = isrow (G)           true if m=1, for an m-by-n GrB matrix G
+%   s = isscalar (G)        true if G is a 1-by-1 GrB matrix
+%   s = isnumeric (G)       true for any GrB matrix G (even logical)
+%   s = isfloat (G)         true if GrB matrix is double, single, complex
+%   s = isreal (G)          true if GrB matrix is not complex
+%   s = isinteger (G)       true if GrB matrix is int8, int16, ..., uint64
+%   s = islogical (G)       true if GrB matrix is logical
+%   s = isa (G, classname)  check if a GrB matrix is of a specific class
+
+%%
+%   C = diag (G,k)          diagonal matrices and diagonals of GrB matrix G
+%   L = tril (G,k)          lower triangular part of GrB matrix G
+%   U = triu (G,k)          upper triangular part of GrB matrix G
+%   C = kron (A,B)          Kronecker product
+%   C = repmat (G, ...)     replicate and tile a GraphBLAS matrix
+%   C = reshape (G, ...)    reshape a GraphBLAS matrix
+%   C = abs (G)             absolute value
+%   C = sign (G)            signum function
+%   s = istril (G)          true if G is lower triangular
+%   s = istriu (G)          true if G is upper triangular
+%   s = isbanded (G,...)    true if G is banded
+%   s = isdiag (G)          true if G is diagonal
+%   s = ishermitian (G)     true if G is Hermitian
+%   s = issymmetric (G)     true if G is symmetric
+%   [lo,hi] = bandwidth (G) determine the lower & upper bandwidth of G
+%   C = sum (G, option)     reduce via sum, to vector or scalar
+%   C = prod (G, option)    reduce via product, to vector or scalar
+%   s = norm (G, kind)      1-norm or inf-norm of a GrB matrix
+%   C = max (G, ...)        reduce via max, to vector or scalar
+%   C = min (G, ...)        reduce via min, to vector or scalar
+%   C = any (G, ...)        reduce via '|', to vector or scalar
+%   C = all (G, ...)        reduce via '&', to vector or scalar
+
+%%
+%   C = sqrt (G)            element-wise square root
+%   C = eps (G)             floating-point spacing
+%   C = ceil (G)            round towards infinity
+%   C = floor (G)           round towards -infinity
+%   C = round (G)           round towards nearest
+%   C = fix (G)             round towards zero
+%   C = isfinite (G)        test if finite
+%   C = isinf (G)           test if infinite
+%   C = isnan (G)           test if NaN
+%   C = spfun (fun, G)      evaluate a function on the entries of G
+%   p = amd (G)             approximate minimum degree ordering
+%   p = colamd (G)          column approximate minimum degree ordering
+%   p = symamd (G)          approximate minimum degree ordering
+%   p = symrcm (G)          reverse Cuthill-McKee ordering
+%   [...] = dmperm (G)      Dulmage-Mendelsohn permutation
+%   parent = etree (G)      elimination tree
+%   C = conj (G)            complex conjugate
+%   C = real (G)            real part of a complex GraphBLAS matrix
+%   [V, ...] = eig (G,...)  eigenvalues and eigenvectors
+%   assert (G)              generate an error if G is false
+%   C = zeros (...,'like',G)   all-zero matrix, same type as G
+%   C = false (...,'like',G)   all-false logical matrix
+%   C = ones (...,'like',G)    matrix with all ones, same type as G
+
+%% Operator overloading:
+%
+%   C = plus (A, B)         C = A + B
+%   C = minus (A, B)        C = A - B
+%   C = uminus (G)          C = -G
+%   C = uplus (G)           C = +G
+%   C = times (A, B)        C = A .* B
+%   C = mtimes (A, B)       C = A * B
+%   C = rdivide (A, B)      C = A ./ B
+%   C = ldivide (A, B)      C = A .\ B
+%   C = mrdivide (A, B)     C = A / B
+%   C = mldivide (A, B)     C = A \ B
+%   C = power (A, B)        C = A .^ B
+%   C = mpower (A, B)       C = A ^ B
+%   C = lt (A, B)           C = A < B
+%   C = gt (A, B)           C = A > B
+%   C = le (A, B)           C = A <= B
+%   C = ge (A, B)           C = A >= B
+%   C = ne (A, B)           C = A ~= B
+%   C = eq (A, B)           C = A == B
+%   C = and (A, B)          C = A & B
+%   C = or (A, B)           C = A | B
+%   C = not (G)             C = ~G
+%   C = ctranspose (G)      C = G'
+%   C = transpose (G)       C = G.'
+%   C = horzcat (A, B)      C = [A , B]
+%   C = vertcat (A, B)      C = [A ; B]
+%   C = subsref (A, I, J)   C = A (I,J) or C = A (M)
+%   C = subsasgn (A, I, J)  C (I,J) = A
+%   index = end (A, k, n)   for object indexing, A(1:end,1:end)
+
+%% Static Methods:
+%
+%   The Static Methods for the GrB class can be used on input matrices of
+%   any kind: GraphBLAS sparse matrices, MATLAB sparse matrices, or
+%   MATLAB dense matrices, in any combination.  The output matrix Cout is
+%   a GraphBLAS matrix, by default, but can be optionally returned as a
+%   MATLAB sparse or dense matrix.  The static methods divide into two
+%   categories: those that perform basic functions, and the GraphBLAS
+%   operations that use the mask/accum.
+
+%% GraphBLAS basic functions:
+%
+%   GrB.clear                    clear GraphBLAS workspace and settings
+%   GrB.descriptorinfo (d)       list properties of a descriptor
+%   GrB.unopinfo (op, type)      list properties of a unary operator
+%   GrB.binopinfo (op, type)     list properties of a binary operator
+%   GrB.monoidinfo (op, type)    list properties of a monoid
+%   GrB.semiringinfo (s, type)   list properties of a semiring
+%   t = GrB.threads (t)          set/get # of threads to use in GraphBLAS
+%   c = GrB.chunk (c)            set/get chunk size to use in GraphBLAS
+%   b = GrB.burble (b)           set/get burble (diagnostic output)
+%   result = GrB.entries (G,...) count or query entries in a matrix
+%   result = GrB.nonz (G,...)    count or query nonzeros in a matrix
+%   C = GrB.prune (A, id)        prune entries equal to id
+%   C = GrB.offdiag (A)          prune diagonal entries
+%   s = GrB.isfull (A)           true if all entries present
+%   [C,I,J] = GrB.compact (A,id) remove empty rows and columns
+%   G = GrB.empty (m, n)         return an empty GraphBLAS matrix
+%   s = GrB.type (A)             get the type of a MATLAB or GrB matrix A
+%   s = GrB.issigned (type)      true if type is signed
+%   f = GrB.format (f)           set/get matrix format to use in GraphBLAS
+%   s = GrB.isbyrow (A)          true if format f A is 'by row'
+%   s = GrB.isbycol (A)          true if format f A is 'by col'
+%   C = GrB.expand (scalar, A)   expand a scalar (C = scalar*spones(A))
+%   C = GrB.eye                  identity matrix of any type
+%   C = GrB.speye                identity matrix (of type 'double')
+%   C = GrB.build (I, J, X, m, n, dup, type, desc)
+%                                build a GrB matrix from list of entries
+%   [I,J,X] = GrB.extracttuples (A, desc)
+%                                extract all entries from a matrix
+%   s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
+
+%% GraphBLAS operations with Cout, mask M, and accum.
+%
+%   Cout = GrB.mxm (Cin, M, accum, semiring, A, B, desc)
+%                   sparse matrix-matrix multiplication over a semiring
+%   Cout = GrB.select (Cin, M, accum, op, A, b, desc)
+%                   select a subset of entries from a matrix
+%   Cout = GrB.assign (Cin, M, accum, A, I, J, desc)
+%                   sparse matrix assignment, such as C(I,J)=A
+%   Cout = GrB.subassign (Cin, M, accum, A, I, J, desc)
+%                   sparse matrix assignment, such as C(I,J)=A
+%   Cout = GrB.vreduce (Cin, M, accum, op, A, desc)
+%                   reduce a matrix to a vector
+%   Cout = GrB.reduce (Cin, accum, op, A, desc)
+%                   reduce a matrix to a scalar
+%   Cout = GrB.kronecker (Cin, M, accum, op, A, B, desc)
+%                   Kronecker product
+%   Cout = GrB.trans (Cin, M, accum, A, desc)
+%                   transpose a matrix
+%   Cout = GrB.eadd (Cin, M, accum, op, A, B, desc)
+%                   element-wise addition
+%   Cout = GrB.emult (Cin, M, accum, op, A, B, desc)
+%                   element-wise multiplication
+%   Cout = GrB.apply (Cin, M, accum, op, A, desc)
+%                   apply a unary operator
+%   Cout = GrB.extract (Cin, M, accum, A, I, J, desc)
+%                   extract submatrix, like C=A(I,J) in MATLAB
+%
+%%
+% GraphBLAS operations (with Cout, Cin arguments) take the following form:
+%
+%   C<#M,replace> = accum (C, operation (A or A', B or B'))
+%
+%   C is both an input and output matrix.  In this MATLAB interface to
+%   GraphBLAS, it is split into Cin (the value of C on input) and Cout
+%   (the value of C on output).  M is the optional mask matrix, and #M is
+%   either M or !M depending on whether or not the mask is complemented
+%   via the desc.mask option.  The replace option is determined by
+%   desc.out; if present, C is cleared after it is used in the accum
+%   operation but before the final assignment.  A and/or B may optionally
+%   be transposed via the descriptor fields desc.in0 and desc.in1,
+%   respectively.  To select the format of Cout, use desc.format.  See
+%   GrB.descriptorinfo for more details.
+%
+%   accum is optional; if not is not present, then the operation becomes
+%   C<...> = operation(A,B).  Otherwise, C = C + operation(A,B) is
+%   computed where '+' is the accum operator.  It acts like a sparse
+%   matrix addition (see GrB.eadd), in terms of the structure of the
+%   result C, but any binary operator can be used.
+%
+%   The mask M acts like MATLAB logical indexing.  If M(i,j)=1 then
+%   C(i,j) can be modified; if zero, it cannot be modified by the
+%   operation.
+
+%% Static Methods for graph algorithms:
+%
+%   r = GrB.pagerank (A, opts) ;            % PageRank of a matrix
+%   C = GrB.ktruss (A, k, check) ;          % k-truss
+%   s = GrB.tricount (A, check) ;           % triangle count
+%   L = GrB.laplacian (A, type, check) ;    % Laplacian graph
+%   C = GrB.incidence (A, ...) ;            % incidence matrix
+%   [v, parent] = GrB.bfs (A, s, ...) ;     % breadth-first search
+%   iset = GrB.mis (A, check) ;             % maximal independent set
+%   Y = GrB.dnn (W, bias, Y0) ;             % deep neural network
+%
+%   More graph algorithms will be added in the future.
+%
+% Thanks for watching!
+%
+% Tim Davis, Texas A&M University, http://faculty.cse.tamu.edu/davis
+% See also sparse, doc sparse, and https://twitter.com/DocSparse
+
+
+##### SOURCE END #####
+--></body></html>
\ No newline at end of file
diff --git a/GraphBLAS/demo/html/DellXPS13/README.txt b/GraphBLAS/demo/html/DellXPS13/README.txt
index 0fb21ce24a..2f0955020e 100644
--- a/GraphBLAS/demo/html/DellXPS13/README.txt
+++ b/GraphBLAS/demo/html/DellXPS13/README.txt
@@ -8,3 +8,5 @@ when running OpenMP inside MATLAB).
 
 v310: output from GraphBLAS v3.1.0
 v312: output from GraphBLAS v3.1.2 (draft, Nov 15, 2019)
+v320: output from GraphBLAS v3.2.0 (Feb 20, 2020)
+
diff --git a/GraphBLAS/demo/html/DellXPS13/v320/graphblas_demo.html b/GraphBLAS/demo/html/DellXPS13/v320/graphblas_demo.html
new file mode 100644
index 0000000000..06d933abba
--- /dev/null
+++ b/GraphBLAS/demo/html/DellXPS13/v320/graphblas_demo.html
@@ -0,0 +1,2693 @@
+
+<!DOCTYPE html
+  PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+   <!--
+This HTML was auto-generated from MATLAB code.
+To make changes, update the MATLAB code and republish this document.
+      --><title>GraphBLAS: graph algorithms in the language of linear algebra</title><meta name="generator" content="MATLAB 9.7"><link rel="schema.DC" href="http://purl.org/dc/elements/1.1/"><meta name="DC.date" content="2020-02-20"><meta name="DC.source" content="graphblas_demo.m"><style type="text/css">
+html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td{margin:0;padding:0;border:0;outline:0;font-size:100%;vertical-align:baseline;background:transparent}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:'';content:none}:focus{outine:0}ins{text-decoration:none}del{text-decoration:line-through}table{border-collapse:collapse;border-spacing:0}
+
+html { min-height:100%; margin-bottom:1px; }
+html body { height:100%; margin:0px; font-family:Arial, Helvetica, sans-serif; font-size:10px; color:#000; line-height:140%; background:#fff none; overflow-y:scroll; }
+html body td { vertical-align:top; text-align:left; }
+
+h1 { padding:0px; margin:0px 0px 25px; font-family:Arial, Helvetica, sans-serif; font-size:1.5em; color:#d55000; line-height:100%; font-weight:normal; }
+h2 { padding:0px; margin:0px 0px 8px; font-family:Arial, Helvetica, sans-serif; font-size:1.2em; color:#000; font-weight:bold; line-height:140%; border-bottom:1px solid #d6d4d4; display:block; }
+h3 { padding:0px; margin:0px 0px 5px; font-family:Arial, Helvetica, sans-serif; font-size:1.1em; color:#000; font-weight:bold; line-height:140%; }
+
+a { color:#005fce; text-decoration:none; }
+a:hover { color:#005fce; text-decoration:underline; }
+a:visited { color:#004aa0; text-decoration:none; }
+
+p { padding:0px; margin:0px 0px 20px; }
+img { padding:0px; margin:0px 0px 20px; border:none; }
+p img, pre img, tt img, li img, h1 img, h2 img { margin-bottom:0px; } 
+
+ul { padding:0px; margin:0px 0px 20px 23px; list-style:square; }
+ul li { padding:0px; margin:0px 0px 7px 0px; }
+ul li ul { padding:5px 0px 0px; margin:0px 0px 7px 23px; }
+ul li ol li { list-style:decimal; }
+ol { padding:0px; margin:0px 0px 20px 0px; list-style:decimal; }
+ol li { padding:0px; margin:0px 0px 7px 23px; list-style-type:decimal; }
+ol li ol { padding:5px 0px 0px; margin:0px 0px 7px 0px; }
+ol li ol li { list-style-type:lower-alpha; }
+ol li ul { padding-top:7px; }
+ol li ul li { list-style:square; }
+
+.content { font-size:1.2em; line-height:140%; padding: 20px; }
+
+pre, code { font-size:12px; }
+tt { font-size: 1.2em; }
+pre { margin:0px 0px 20px; }
+pre.codeinput { padding:10px; border:1px solid #d3d3d3; background:#f7f7f7; }
+pre.codeoutput { padding:10px 11px; margin:0px 0px 20px; color:#4c4c4c; }
+pre.error { color:red; }
+
+@media print { pre.codeinput, pre.codeoutput { word-wrap:break-word; width:100%; } }
+
+span.keyword { color:#0000FF }
+span.comment { color:#228B22 }
+span.string { color:#A020F0 }
+span.untermstring { color:#B20000 }
+span.syscmd { color:#B28C00 }
+
+.footer { width:auto; padding:10px 0px; margin:25px 0px 0px; border-top:1px dotted #878787; font-size:0.8em; line-height:140%; font-style:italic; color:#878787; text-align:left; float:none; }
+.footer p { margin:0px; }
+.footer a { color:#878787; }
+.footer a:hover { color:#878787; text-decoration:underline; }
+.footer a:visited { color:#878787; }
+
+table th { padding:7px 5px; text-align:left; vertical-align:middle; border: 1px solid #d6d4d4; font-weight:bold; }
+table td { padding:7px 5px; text-align:left; vertical-align:top; border:1px solid #d6d4d4; }
+
+
+
+
+
+  </style></head><body><div class="content"><h1>GraphBLAS: graph algorithms in the language of linear algebra</h1><!--introduction--><p>GraphBLAS is a library for creating graph algorithms based on sparse linear algebraic operations over semirings.  Visit <a href="http://graphblas.org">http://graphblas.org</a> for more details and resources.  See also the SuiteSparse:GraphBLAS User Guide in this package.</p><p>SuiteSparse:GraphBLAS, (c) 2017-2020, Tim Davis, Texas A&amp;M University, <a href="http://faculty.cse.tamu.edu/davis">http://faculty.cse.tamu.edu/davis</a></p><!--/introduction--><h2>Contents</h2><div><ul><li><a href="#1">GraphBLAS: faster and more general sparse matrices for MATLAB</a></li><li><a href="#2">Sparse integer matrices</a></li><li><a href="#3">Sparse single-precision matrices</a></li><li><a href="#4">Mixing MATLAB and GraphBLAS matrices</a></li><li><a href="#5">Faster matrix operations</a></li><li><a href="#6">A wide range of semirings</a></li><li><a href="#8">The max.plus tropical semiring</a></li><li><a href="#9">A boolean semiring</a></li><li><a href="#13">GraphBLAS operators, monoids, and semirings</a></li><li><a href="#16">Element-wise operations</a></li><li><a href="#18">Subtracting two matrices</a></li><li><a href="#20">Element-wise 'multiplication'</a></li><li><a href="#22">Overloaded operators</a></li><li><a href="#25">Overloaded functions</a></li><li><a href="#27">Zeros are handled differently</a></li><li><a href="#29">Displaying contents of a GraphBLAS matrix</a></li><li><a href="#34">Storing a matrix by row or by column</a></li><li><a href="#38">Hypersparse matrices</a></li><li><a href="#41">numel uses vpa if the matrix is really huge</a></li><li><a href="#43">The mask and accumulator</a></li><li><a href="#45">The descriptor</a></li><li><a href="#46">Integer arithmetic is different in GraphBLAS</a></li><li><a href="#48">An example graph algorithm: breadth-first search</a></li><li><a href="#49">Example graph algorithm: Luby's method in GraphBLAS</a></li><li><a href="#50">Sparse deep neural network</a></li><li><a href="#51">Solving the sparse deep neural network problem with GraphbLAS</a></li><li><a href="#52">Solving the sparse deep neural network problem with MATLAB</a></li><li><a href="#53">For objects, GraphBLAS has better colon notation than MATLAB</a></li><li><a href="#56">Iterative solvers work as-is</a></li><li><a href="#57">... even in single precision</a></li><li><a href="#60">Extreme performance differences between GraphBLAS and MATLAB.</a></li><li><a href="#61">Sparse logical indexing is much, much faster in GraphBLAS</a></li><li><a href="#63">Limitations and their future solutions</a></li><li><a href="#70">GraphBLAS operations</a></li><li><a href="#71">Methods for the GrB class:</a></li><li><a href="#75">Operator overloading:</a></li><li><a href="#76">Static Methods:</a></li><li><a href="#77">GraphBLAS basic functions:</a></li><li><a href="#78">GraphBLAS operations with Cout, mask M, and accum.</a></li><li><a href="#80">Static Methods for graph algorithms:</a></li></ul></div><h2 id="1">GraphBLAS: faster and more general sparse matrices for MATLAB</h2><p>GraphBLAS is not only useful for creating graph algorithms; it also supports a wide range of sparse matrix data types and operations. MATLAB can compute C=A*B with just two semirings: 'plus.times.double' and 'plus.times.complex' for complex matrices.  GraphBLAS has 1,040 unique built-in semirings, such as 'max.plus' (<a href="https://en.wikipedia.org/wiki/Tropical_semiring">https://en.wikipedia.org/wiki/Tropical_semiring</a>).  These semirings can be used to construct a wide variety of graph algorithms, based on operations on sparse adjacency matrices.</p><p>GraphBLAS supports sparse double and single precision matrices, logical, and sparse integer matrices: int8, int16, int32, int64, uint8, uint16, uint32, and uint64.  Complex matrices will be added in the future.</p><pre class="codeinput">clear <span class="string">all</span>
+format <span class="string">compact</span>
+rng (<span class="string">'default'</span>) ;
+X = 100 * rand (2) ;
+G = GrB (X)              <span class="comment">% GraphBLAS copy of a matrix X, same type</span>
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    81.4724
+    (2,1)    90.5792
+    (1,2)    12.6987
+    (2,2)    91.3376
+
+</pre><h2 id="2">Sparse integer matrices</h2><p>Here's an int8 version of the same matrix:</p><pre class="codeinput">S = int8 (G)            <span class="comment">% convert G to a full MATLAB int8 matrix</span>
+G = GrB (X, <span class="string">'int8'</span>)      <span class="comment">% a GraphBLAS sparse int8 matrix</span>
+</pre><pre class="codeoutput">S =
+  2x2 int8 matrix
+   81   12
+   90   91
+
+G =
+
+  2x2 GraphBLAS int8_t matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)   81
+    (2,1)   90
+    (1,2)   12
+    (2,2)   91
+
+</pre><h2 id="3">Sparse single-precision matrices</h2><p>Matrix operations in GraphBLAS are typically as fast, or faster than MATLAB.  Here's an unfair comparison: computing X^2 with MATLAB in double precision and with GraphBLAS in single precision.  You would naturally expect GraphBLAS to be faster.</p><p>Please wait ...</p><pre class="codeinput">n = 1e5 ;
+X = spdiags (rand (n, 201), -100:100, n, n) ;
+G = GrB (X, <span class="string">'single'</span>) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+tic
+X2 = X^2 ;
+matlab_time = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (in single)\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec (in double)\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">
+GraphBLAS time: 1.88612 sec (in single)
+MATLAB time:    6.23295 sec (in double)
+Speedup of GraphBLAS over MATLAB: 3.30465
+</pre><h2 id="4">Mixing MATLAB and GraphBLAS matrices</h2><p>The error in the last computation is about eps('single') since GraphBLAS did its computation in single precision, while MATLAB used double precision.  MATLAB and GraphBLAS matrices can be easily combined, as in X2-G2.  The sparse single precision matrices take less memory space.</p><pre class="codeinput">err = norm (X2 - G2, 1) / norm (X2,1)
+eps (<span class="string">'single'</span>)
+whos <span class="string">G</span> <span class="string">G2</span> <span class="string">X</span> <span class="string">X2</span>
+</pre><pre class="codeoutput">err =
+   1.5049e-07
+ans =
+  single
+  1.1921e-07
+  Name           Size                    Bytes  Class     Attributes
+
+  G         100000x100000            241879772  GrB                 
+  G2        100000x100000            481518572  GrB                 
+  X         100000x100000            322238408  double    sparse    
+  X2        100000x100000            641756808  double    sparse    
+
+</pre><h2 id="5">Faster matrix operations</h2><p>But even with standard double precision sparse matrices, GraphBLAS is typically faster than the built-in MATLAB methods.  Here's a fair comparison:</p><pre class="codeinput">G = GrB (X) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+err = norm (X2 - G2, 1) / norm (X2,1)
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (in double)\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec (in double)\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">err =
+     0
+
+GraphBLAS time: 2.50213 sec (in double)
+MATLAB time:    6.23295 sec (in double)
+Speedup of GraphBLAS over MATLAB: 2.49105
+</pre><h2 id="6">A wide range of semirings</h2><p>MATLAB can only compute C=A*B using the standard '+.*.double' and '+.*.complex' semirings.  A semiring is defined in terms of a string, 'add.mult.type', where 'add' is a monoid that takes the place of the additive operator, 'mult' is the multiplicative operator, and 'type' is the data type for the two inputs to the mult operator (the type defaults to the type of A for C=A*B).</p><p>In the standard semiring, C=A*B is defined as:</p><pre class="language-matlab">C(i,j) = sum (A(i,:).' .* B(:,j))
+</pre><p>using 'plus' as the monoid and 'times' as the multiplicative operator. But in a more general semiring, 'sum' can be any monoid, which is an associative and commutative operator that has an identity value.  For example, in the 'max.plus' tropical algebra, C(i,j) for C=A*B is defined as:</p><pre class="language-matlab">C(i,j) = max (A(i,:).' + B(:,j))
+</pre><p>This can be computed in GraphBLAS with:</p><pre class="language-matlab">C = GrB.mxm (<span class="string">'max.+'</span>, A, B)
+</pre><pre class="codeinput">n = 3 ;
+A = rand (n) ;
+B = rand (n) ;
+C = zeros (n) ;
+<span class="keyword">for</span> i = 1:n
+    <span class="keyword">for</span> j = 1:n
+        C(i,j) = max (A (i,:).' + B (:,j)) ;
+    <span class="keyword">end</span>
+<span class="keyword">end</span>
+C2 = GrB.mxm (<span class="string">'max.+'</span>, A, B) ;
+fprintf (<span class="string">'\nerr = norm (C-C2,1) = %g\n'</span>, norm (C-C2,1)) ;
+</pre><pre class="codeoutput">
+err = norm (C-C2,1) = 0
+</pre><h2 id="8">The max.plus tropical semiring</h2><p>Here are details of the "max.plus" tropical semiring.  The identity value is -inf since max(x,-inf) = max (-inf,x) = -inf for any x.</p><pre class="codeinput">GrB.semiringinfo (<span class="string">'max.+.double'</span>) ;
+</pre><pre class="codeoutput">
+    GraphBLAS Semiring: max.+.double (built-in)
+    GraphBLAS Monoid: semiring-&gt;add (built-in)
+    GraphBLAS BinaryOp: monoid-&gt;op (built-in) z=max(x,y)
+    GraphBLAS type: ztype double size: 8
+    GraphBLAS type: xtype double size: 8
+    GraphBLAS type: ytype double size: 8
+    identity: [    -inf ] terminal: [    inf ]
+
+    GraphBLAS BinaryOp: semiring-&gt;multiply (built-in) z=plus(x,y)
+    GraphBLAS type: ztype double size: 8
+    GraphBLAS type: xtype double size: 8
+    GraphBLAS type: ytype double size: 8
+</pre><h2 id="9">A boolean semiring</h2><p>MATLAB cannot multiply two logical matrices.  MATLAB R2019a converts them to double and uses the conventional +.*.double semiring instead. In GraphBLAS, this is the common Boolean 'or.and.logical' semiring, which is widely used in linear algebraic graph algorithms.</p><pre class="codeinput">GrB.semiringinfo (<span class="string">'|.&amp;.logical'</span>) ;
+</pre><pre class="codeoutput">
+    GraphBLAS Semiring: |.&amp;.logical (built-in)
+    GraphBLAS Monoid: semiring-&gt;add (built-in)
+    GraphBLAS BinaryOp: monoid-&gt;op (built-in) z=or(x,y)
+    GraphBLAS type: ztype bool size: 1
+    GraphBLAS type: xtype bool size: 1
+    GraphBLAS type: ytype bool size: 1
+    identity: [   0 ] terminal: [   1 ]
+
+    GraphBLAS BinaryOp: semiring-&gt;multiply (built-in) z=and(x,y)
+    GraphBLAS type: ztype bool size: 1
+    GraphBLAS type: xtype bool size: 1
+    GraphBLAS type: ytype bool size: 1
+</pre><pre class="codeinput">clear
+A = sparse (rand (3) &gt; 0.5)
+B = sparse (rand (3) &gt; 0.2)
+</pre><pre class="codeoutput">A =
+  3x3 sparse logical array
+   (2,1)      1
+   (2,2)      1
+   (3,2)      1
+   (1,3)      1
+B =
+  3x3 sparse logical array
+   (1,1)      1
+   (2,1)      1
+   (3,1)      1
+   (1,2)      1
+   (2,2)      1
+   (3,2)      1
+   (1,3)      1
+   (2,3)      1
+   (3,3)      1
+</pre><pre class="codeinput"><span class="keyword">try</span>
+    <span class="comment">% MATLAB R2019a does this by casting A and B to double</span>
+    C1 = A*B
+<span class="keyword">catch</span>
+    <span class="comment">% MATLAB R2018a throws an error</span>
+    fprintf (<span class="string">'MATLAB R2019a required for C=A*B with logical\n'</span>) ;
+    fprintf (<span class="string">'matrices.  Explicitly converting to double:\n'</span>) ;
+    C1 = double (A) * double (B)
+<span class="keyword">end</span>
+C2 = GrB (A) * GrB (B)
+</pre><pre class="codeoutput">C1 =
+   (1,1)        1
+   (2,1)        2
+   (3,1)        1
+   (1,2)        1
+   (2,2)        2
+   (3,2)        1
+   (1,3)        1
+   (2,3)        2
+   (3,3)        1
+
+C2 =
+
+  3x3 GraphBLAS bool matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)   1
+    (2,1)   1
+    (3,1)   1
+    (1,2)   1
+    (2,2)   1
+    (3,2)   1
+    (1,3)   1
+    (2,3)   1
+    (3,3)   1
+
+</pre><p>Note that C1 is a MATLAB sparse double matrix, and contains non-binary values.  C2 is a GraphBLAS logical matrix.</p><pre class="codeinput">whos
+GrB.type (C2)
+</pre><pre class="codeoutput">  Name      Size            Bytes  Class      Attributes
+
+  A         3x3                68  logical    sparse    
+  B         3x3               113  logical    sparse    
+  C1        3x3               176  double     sparse    
+  C2        3x3              1079  GrB                  
+
+ans =
+    'logical'
+</pre><h2 id="13">GraphBLAS operators, monoids, and semirings</h2><p>The C interface for SuiteSparse:GraphBLAS allows for arbitrary types and operators to be constructed.  However, the MATLAB interface to SuiteSparse:GraphBLAS is restricted to pre-defined types and operators: a mere 11 types, 66 unary operators, 275 binary operators, 44 monoids, 16 select operators, and 1,865 semirings (1,040 of which are unique, since some binary operators are equivalent: 'min.logical' and '&amp;.logical' are the same thing, for example).  The complex type and its binary operators, monoids, and semirings will be added in the near future.</p><p>That gives you a lot of tools to create all kinds of interesting graph algorithms.  For example:</p><pre class="language-matlab">GrB.bfs    <span class="comment">% breadth-first search</span>
+GrB.dnn    <span class="comment">% sparse deep neural network (http://graphchallenge.org)</span>
+GrB.mis    <span class="comment">% maximal independent set</span>
+</pre><p>See 'help GrB.binopinfo' for a list of the binary operators, and 'help GrB.monoidinfo' for the ones that can be used as the additive monoid in a semiring.</p><pre class="codeinput">help <span class="string">GrB.binopinfo</span>
+</pre><pre class="codeoutput"> GRB.BINOPINFO list the details of a GraphBLAS binary operator.
+ 
+  Usage
+ 
+    GrB.binopinfo
+    GrB.binopinfo (op)
+    GrB.binopinfo (op, type)
+ 
+  For GrB.binopinfo(op), the op must be a string of the form
+  'op.type', where 'op' is listed below.  The second usage allows the
+  type to be omitted from the first argument, as just 'op'.  This is
+  valid for all GraphBLAS operations, since the type defaults to the
+  type of the input matrices.  However, GrB.binopinfo does not have a
+  default type and thus one must be provided, either in the op as
+  GrB.binopinfo ('+.double'), or in the second argument, GrB.binopinfo
+  ('+', 'double').
+ 
+  The MATLAB interface to GraphBLAS provides for 27 different binary
+  operators, each of which may be used with any of the 11 types, for
+  a total of 27*11 = 297 valid binary operators.  Binary operators
+  are defined by a string of the form 'op.type', or just 'op'.  In
+  the latter case, the type defaults to the type of the matrix inputs
+  to the GraphBLAS operation.
+ 
+  The 6 comparator operators come in two flavors.  For the is*
+  operators, the result has the same type as the inputs, x and y,
+  with 1 for true and 0 for false.  For example isgt.double (pi, 3.0)
+  is the double value 1.0.  For the second set of 6 operators (eq,
+  ne, gt, lt, ge, le), the result is always logical (true or false).
+  In a semiring, the type of the add monoid must exactly match the
+  type of the output of the multiply operator, and thus
+  'plus.iseq.double' is valid (counting how many terms are equal).
+  The 'plus.eq.double' semiring is valid, but not the same semiring
+  since the 'plus' of 'plus.eq.double' has a logical type and is thus
+  equivalent to 'or.eq.double'.   The 'or.eq' is true if any terms
+  are equal and false otherwise (it does not count the number of
+  terms that are equal).
+ 
+  The following binary operators are available.  Many have equivalent
+  synonyms, so that '1st' and 'first' both define the first(x,y) = x
+  operator.
+ 
+    operator name(s) f(x,y)         |   operator names(s) f(x,y)
+    ---------------- ------         |   ----------------- ------
+    1st first        x              |   iseq             x == y
+    2nd second       y              |   isne             x ~= y
+    min              min(x,y)       |   isgt             x &gt; y
+    max              max(x,y)       |   islt             x &lt; y
+    +   plus         x+y            |   isge             x &gt;= y
+    -   minus        x-y            |   isle             x &lt;= y
+    rminus           y-x            |   ==  eq           x == y
+    *   times        x*y            |   ~=  ne           x ~= y
+    /   div          x/y            |   &gt;   gt           x &gt; y
+    \   rdiv         y/x            |   &lt;   lt           x &lt; y
+    |   || or  lor   x | y          |   &gt;=  ge           x &gt;= y
+    &amp;   &amp;&amp; and land  x &amp; y          |   &lt;=  le           x &lt;= y
+    xor lxor         xor(x,y)       |
+    pair             1              |   any              x, or y
+ 
+  The three logical operators, lor, land, and lxor, also come in 11
+  types.  z = lor.double (x,y) tests the condition (x~=0) || (y~=0),
+  and returns the double value 1.0 if true, or 0.0 if false.
+ 
+  Example:
+ 
+    % valid binary operators
+    GrB.binopinfo ('+.double') ;
+    GrB.binopinfo ('1st.int32') ;
+ 
+    % invalid binary operator (an error; this is a unary op):
+    GrB.binopinfo ('abs.double') ;
+ 
+  See also GrB.descriptorinfo, GrB.monoidinfo, GrB.selectopinfo,
+  GrB.semiringinfo, GrB.unopinfo.
+
+</pre><pre class="codeinput">help <span class="string">GrB.monoidinfo</span>
+</pre><pre class="codeoutput"> GRB.MONOIDINFO list the details of a GraphBLAS monoid.
+ 
+  Usage
+ 
+    GrB.monoidinfo
+    GrB.monoidinfo (monoid)
+    GrB.monoidinfo (monoid, type)
+ 
+  For GrB.monoidinfo(op), the op must be a string of the form
+  'op.type', where 'op' is listed below.  The second usage allows the
+  type to be omitted from the first argument, as just 'op'.  This is
+  valid for all GraphBLAS operations, since the type defaults to the
+  type of the input matrices.  However, GrB.monoidinfo does not have a
+  default type and thus one must be provided, either in the op as
+  GrB.monoidinfo ('+.double'), or in the second argument,
+  GrB.monoidinfo ('+', 'double').
+ 
+  The MATLAB interface to GraphBLAS provides for 44 different
+  monoids.  The valid monoids are: '+', '*', 'max', and 'min' for all
+  but the 'logical' type, and '|', '&amp;', 'xor', and 'eq' for the
+  'logical' type.
+ 
+  Example:
+ 
+    % valid monoids
+    GrB.monoidinfo ('+.double') ;
+    GrB.monoidinfo ('*.int32') ;
+ 
+    % invalid monoids
+    GrB.monoidinfo ('1st.int32') ;
+    GrB.monoidinfo ('abs.double') ;
+ 
+  See also GrB.binopinfo, GrB.descriptorinfo, % GrB.selectopinfo,
+  GrB.semiringinfo, GrB.unopinfo.
+
+</pre><h2 id="16">Element-wise operations</h2><p>Binary operators can be used in element-wise matrix operations, like C=A+B and C=A.*B.  For the matrix addition C=A+B, the pattern of C is the set union of A and B, and the '+' operator is applied for entries in the intersection.  Entries in A but not B, or in B but not A, are assigned to C without using the operator.  The '+' operator is used for C=A+B but any operator can be used with GrB.eadd.</p><pre class="codeinput">A = GrB (sprand (3, 3, 0.5)) ;
+B = GrB (sprand (3, 3, 0.5)) ;
+C1 = A + B
+C2 = GrB.eadd (<span class="string">'+'</span>, A, B)
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    1.47841
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    1.47841
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+err =
+     0
+</pre><h2 id="18">Subtracting two matrices</h2><p>A-B and GrB.eadd ('-', A, B) are not the same thing, since the '-' operator is not applied to an entry that is in B but not A.</p><pre class="codeinput">C1 = A-B
+C2 = GrB.eadd (<span class="string">'-'</span>, A, B)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    -0.334348
+    (2,2)    0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+</pre><p>But these give the same result</p><pre class="codeinput">C1 = A-B
+C2 = GrB.eadd (<span class="string">'+'</span>, A, GrB.apply (<span class="string">'-'</span>, B))
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  7 nonzeros, 7 entries
+
+    (1,1)    -0.666139
+    (3,1)    -0.735859
+    (1,2)    -0.334348
+    (2,2)    -0.146938
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+err =
+     0
+</pre><h2 id="20">Element-wise 'multiplication'</h2><p>For C = A.*B, the result C is the set intersection of the pattern of A and B.  The operator is applied to entries in both A and B.  Entries in A but not B, or B but not A, do not appear in the result C.</p><pre class="codeinput">C1 = A.*B
+C2 = GrB.emult (<span class="string">'*'</span>, A, B)
+C3 = double (A) .* double (B)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.518474
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.518474
+
+C3 =
+   (1,2)       0.5185
+</pre><p>Just as in GrB.eadd, any operator can be used in GrB.emult:</p><pre class="codeinput">A
+B
+C2 = GrB.emult (<span class="string">'max'</span>, A, B)
+</pre><pre class="codeoutput">
+A =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,2)    0.572029
+    (3,2)    0.566879
+    (2,3)    0.248635
+    (3,3)    0.104226
+
+
+B =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.666139
+    (3,1)    0.735859
+    (1,2)    0.906378
+    (2,2)    0.146938
+
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  1 nonzero, 1 entry
+
+    (1,2)    0.906378
+
+</pre><h2 id="22">Overloaded operators</h2><p>The following operators all work as you would expect for any matrix. The matrices A and B can be GraphBLAS matrices, or MATLAB sparse or dense matrices, in any combination, or scalars where appropriate:</p><pre>  A+B   A-B  A*B   A.*B  A./B  A.\B  A.^b   A/b   C=A(I,J)
+  -A    +A   ~A    A'    A.'   A&amp;B   A|B    b\A   C(I,J)=A
+  A~=B  A&gt;B  A==B  A&lt;=B  A&gt;=B  A&lt;B   [A,B]  [A;B]
+  A(1:end,1:end)</pre><p>For A^b, b must be a non-negative integer.</p><pre class="codeinput">C1 = [A B] ;
+C2 = [double(A) double(B)] ;
+assert (isequal (double (C1), C2))
+</pre><pre class="codeinput">C1 = A^2
+C2 = double (A)^2 ;
+err = norm (C1 - C2, 1)
+assert (err &lt; 1e-12)
+</pre><pre class="codeoutput">
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  5 nonzeros, 5 entries
+
+    (2,2)    0.140946
+    (3,2)    0.0590838
+    (1,3)    0.142227
+    (2,3)    0.0259144
+    (3,3)    0.151809
+
+err =
+     0
+</pre><pre class="codeinput">C1 = A (1:2,2:end)
+A = double (A) ;
+C2 = A (1:2,2:end) ;
+assert (isequal (double (C1), C2))
+</pre><pre class="codeoutput">
+C1 =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  2 nonzeros, 2 entries
+
+    (1,1)    0.572029
+    (2,2)    0.248635
+
+</pre><h2 id="25">Overloaded functions</h2><p>Many MATLAB built-in functions can be used with GraphBLAS matrices:</p><p>A few differences with the built-in functions:</p><pre class="language-matlab">S = sparse (G)        <span class="comment">% makes a copy of a GrB matrix</span>
+F = full (G)          <span class="comment">% adds explicit zeros, so numel(F)==nnz(F)</span>
+F = full (G,type,id)  <span class="comment">% adds explicit identity values to a GrB matrix</span>
+disp (G, level)       <span class="comment">% display a GrB matrix G; level=2 is the default.</span>
+</pre><p>In the list below, the first set of Methods are overloaded built-in methods.  They are used as-is on GraphBLAS matrices, such as C=abs(G). The Static methods are prefixed with "GrB.", as in C = GrB.apply ( ... ).</p><pre class="codeinput">methods <span class="string">GrB</span>
+</pre><pre class="codeoutput">
+Methods for class GrB:
+
+GrB             ge              le              sparse          
+abs             graph           length          spfun           
+all             gt              logical         spones          
+amd             horzcat         lt              sprand          
+and             int16           max             sprandn         
+any             int32           min             sprandsym       
+assert          int64           minus           sprintf         
+bandwidth       int8            mldivide        sqrt            
+ceil            isa             mpower          subsasgn        
+colamd          isbanded        mrdivide        subsref         
+complex         isdiag          mtimes          sum             
+conj            isempty         ne              symamd          
+ctranspose      isequal         nnz             symrcm          
+diag            isfinite        nonzeros        times           
+digraph         isfloat         norm            transpose       
+disp            ishermitian     not             tril            
+display         isinf           numel           triu            
+dmperm          isinteger       nzmax           true            
+double          islogical       ones            uint16          
+eig             ismatrix        or              uint32          
+end             isnan           plus            uint64          
+eps             isnumeric       power           uint8           
+eq              isreal          prod            uminus          
+etree           isscalar        rdivide         uplus           
+false           issparse        real            vertcat         
+find            issymmetric     repmat          xor             
+fix             istril          reshape         zeros           
+flip            istriu          round           
+floor           isvector        sign            
+fprintf         kron            single          
+full            ldivide         size            
+
+Static methods:
+
+apply           emult           issigned        reduce          
+assign          entries         kronecker       select          
+bfs             expand          ktruss          selectopinfo    
+binopinfo       extract         laplacian       semiringinfo    
+build           extracttuples   mis             speye           
+burble          eye             monoidinfo      subassign       
+chunk           finalize        mxm             threads         
+clear           format          nonz            trans           
+compact         incidence       normdiff        tricount        
+descriptorinfo  init            offdiag         type            
+dnn             isbycol         pagerank        unopinfo        
+eadd            isbyrow         prune           vreduce         
+empty           isfull          random          
+
+</pre><h2 id="27">Zeros are handled differently</h2><p>Explicit zeros cannot be automatically dropped from a GraphBLAS matrix, like they are in MATLAB sparse matrices.  In a shortest-path problem, for example, an edge A(i,j) that is missing has an infinite weight, (the monoid identity of min(x,y) is +inf).  A zero edge weight A(i,j)=0 is very different from an entry that is not present in A.  However, if a GraphBLAS matrix is converted into a MATLAB sparse matrix, explicit zeros are dropped, which is the convention for a MATLAB sparse matrix. They can also be dropped from a GraphBLAS matrix using the GrB.select method.</p><pre class="codeinput">G = GrB (magic (2)) ;
+G (1,1) = 0      <span class="comment">% G(1,1) still appears as an explicit entry</span>
+A = double (G)   <span class="comment">% but it's dropped when converted to MATLAB sparse</span>
+H = GrB.select (<span class="string">'nonzero'</span>, G)  <span class="comment">% drops the explicit zeros from G</span>
+fprintf (<span class="string">'nnz (G): %d  nnz (A): %g nnz (H): %g\n'</span>, <span class="keyword">...</span>
+    nnz (G), nnz (A), nnz (H)) ;
+fprintf (<span class="string">'num entries in G: %d\n'</span>, GrB.entries (G)) ;
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  3 nonzeros, 4 entries
+
+    (1,1)    0
+    (2,1)    4
+    (1,2)    3
+    (2,2)    2
+
+A =
+   (2,1)        4
+   (1,2)        3
+   (2,2)        2
+
+H =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  3 nonzeros, 3 entries
+
+    (2,1)    4
+    (1,2)    3
+    (2,2)    2
+
+nnz (G): 3  nnz (A): 3 nnz (H): 3
+num entries in G: 4
+</pre><h2 id="29">Displaying contents of a GraphBLAS matrix</h2><p>Unlike MATLAB, the default is to display just a few entries of a GrB matrix. Here are all 100 entries of a 10-by-10 matrix, using a non-default disp(G,3):</p><pre class="codeinput">G = GrB (rand (10)) ;
+<span class="comment">% display everything:</span>
+disp (G,3)
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+    (1,1)    0.0342763
+    (2,1)    0.17802
+    (3,1)    0.887592
+    (4,1)    0.889828
+    (5,1)    0.769149
+    (6,1)    0.00497062
+    (7,1)    0.735693
+    (8,1)    0.488349
+    (9,1)    0.332817
+    (10,1)    0.0273313
+    (1,2)    0.467212
+    (2,2)    0.796714
+    (3,2)    0.849463
+    (4,2)    0.965361
+    (5,2)    0.902248
+    (6,2)    0.0363252
+    (7,2)    0.708068
+    (8,2)    0.322919
+    (9,2)    0.700716
+    (10,2)    0.472957
+    (1,3)    0.204363
+    (2,3)    0.00931977
+    (3,3)    0.565881
+    (4,3)    0.183435
+    (5,3)    0.00843818
+    (6,3)    0.284938
+    (7,3)    0.706156
+    (8,3)    0.909475
+    (9,3)    0.84868
+    (10,3)    0.564605
+    (1,4)    0.075183
+    (2,4)    0.535293
+    (3,4)    0.072324
+    (4,4)    0.515373
+    (5,4)    0.926149
+    (6,4)    0.949252
+    (7,4)    0.0478888
+    (8,4)    0.523767
+    (9,4)    0.167203
+    (10,4)    0.28341
+    (1,5)    0.122669
+    (2,5)    0.441267
+    (3,5)    0.157113
+    (4,5)    0.302479
+    (5,5)    0.758486
+    (6,5)    0.910563
+    (7,5)    0.0246916
+    (8,5)    0.232421
+    (9,5)    0.38018
+    (10,5)    0.677531
+    (1,6)    0.869074
+    (2,6)    0.471459
+    (3,6)    0.624929
+    (4,6)    0.987186
+    (5,6)    0.282885
+    (6,6)    0.843833
+    (7,6)    0.869597
+    (8,6)    0.308209
+    (9,6)    0.201332
+    (10,6)    0.706603
+    (1,7)    0.563222
+    (2,7)    0.575795
+    (3,7)    0.056376
+    (4,7)    0.73412
+    (5,7)    0.608022
+    (6,7)    0.0400164
+    (7,7)    0.540801
+    (8,7)    0.023064
+    (9,7)    0.165682
+    (10,7)    0.250393
+    (1,8)    0.23865
+    (2,8)    0.232033
+    (3,8)    0.303191
+    (4,8)    0.579934
+    (5,8)    0.267751
+    (6,8)    0.916376
+    (7,8)    0.833499
+    (8,8)    0.978692
+    (9,8)    0.734445
+    (10,8)    0.102896
+    (1,9)    0.353059
+    (2,9)    0.738955
+    (3,9)    0.57539
+    (4,9)    0.751433
+    (5,9)    0.93256
+    (6,9)    0.281622
+    (7,9)    0.51302
+    (8,9)    0.24406
+    (9,9)    0.950086
+    (10,9)    0.303638
+    (1,10)    0.563593
+    (2,10)    0.705101
+    (3,10)    0.0604146
+    (4,10)    0.672065
+    (5,10)    0.359793
+    (6,10)    0.62931
+    (7,10)    0.977758
+    (8,10)    0.394328
+    (9,10)    0.765651
+    (10,10)    0.457809
+
+
+</pre><p>That was disp(G,3), so every entry was printed.  It's a little long, so the default is not to print everything.</p><p>With the default display (level = 2):</p><pre class="codeinput">G
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+    (1,1)    0.0342763
+    (2,1)    0.17802
+    (3,1)    0.887592
+    (4,1)    0.889828
+    (5,1)    0.769149
+    (6,1)    0.00497062
+    (7,1)    0.735693
+    (8,1)    0.488349
+    (9,1)    0.332817
+    (10,1)    0.0273313
+    (1,2)    0.467212
+    (2,2)    0.796714
+    (3,2)    0.849463
+    (4,2)    0.965361
+    (5,2)    0.902248
+    (6,2)    0.0363252
+    (7,2)    0.708068
+    (8,2)    0.322919
+    (9,2)    0.700716
+    (10,2)    0.472957
+    (1,3)    0.204363
+    (2,3)    0.00931977
+    (3,3)    0.565881
+    (4,3)    0.183435
+    (5,3)    0.00843818
+    (6,3)    0.284938
+    (7,3)    0.706156
+    (8,3)    0.909475
+    (9,3)    0.84868
+    (10,3)    0.564605
+    ...
+
+</pre><p>That was disp(G,2) or just display(G), which is what is printed by a MATLAB statement that doesn't have a trailing semicolon.  With level = 1, disp(G,1) gives just a terse summary:</p><pre class="codeinput">disp (G,1)
+</pre><pre class="codeoutput">
+G =
+
+  10x10 GraphBLAS double matrix, sparse by col:
+  100 nonzeros, 100 entries
+
+
+</pre><h2 id="34">Storing a matrix by row or by column</h2><p>MATLAB stores its sparse matrices by column, refered to as 'standard CSC' in SuiteSparse:GraphBLAS.  In the CSC (compressed sparse column) format, each column of the matrix is stored as a list of entries, with their value and row index.  In the CSR (compressed sparse row) format, each row is stored as a list of values and their column indices. GraphBLAS uses both CSC and CSR, and the two formats can be intermixed arbitrarily.  In its C interface, the default format is CSR.  However, for better compatibility with MATLAB, this MATLAB interface for SuiteSparse:GraphBLAS uses CSC by default instead.</p><pre class="codeinput">rng (<span class="string">'default'</span>) ;
+GrB.clear ;                      <span class="comment">% clear all prior GraphBLAS settings</span>
+fprintf (<span class="string">'the default format is: %s\n'</span>, GrB.format) ;
+C = sparse (rand (2))
+G = GrB (C)
+GrB.format (G)
+</pre><pre class="codeoutput">the default format is: by col
+C =
+   (1,1)       0.8147
+   (2,1)       0.9058
+   (1,2)       0.1270
+   (2,2)       0.9134
+
+G =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (2,1)    0.905792
+    (1,2)    0.126987
+    (2,2)    0.913376
+
+ans =
+    'by col'
+</pre><p>Many graph algorithms work better in CSR format, with matrices stored by row.  For example, it is common to use A(i,j) for the edge (i,j), and many graph algorithms need to access the out-adjacencies of nodes, which is the row A(i,;) for node i.  If the CSR format is desired, GrB.format ('by row') tells GraphBLAS to create all subsequent matrices in the CSR format.  Converting from a MATLAB sparse matrix (in standard CSC format) takes a little more time (requiring a transpose), but subsequent graph algorithms can be faster.</p><pre class="codeinput">G = GrB (C, <span class="string">'by row'</span>)
+fprintf (<span class="string">'the format of G is:    %s\n'</span>, GrB.format (G)) ;
+H = GrB (C)
+fprintf (<span class="string">'the format of H is:    %s\n'</span>, GrB.format (H)) ;
+err = norm (H-G,1)
+</pre><pre class="codeoutput">
+G =
+
+  2x2 GraphBLAS double matrix, sparse by row:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (1,2)    0.126987
+    (2,1)    0.905792
+    (2,2)    0.913376
+
+the format of G is:    by row
+
+H =
+
+  2x2 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.814724
+    (2,1)    0.905792
+    (1,2)    0.126987
+    (2,2)    0.913376
+
+the format of H is:    by col
+err =
+     0
+</pre><h2 id="38">Hypersparse matrices</h2><p>SuiteSparse:GraphBLAS can use two kinds of sparse matrix data structures: standard and hypersparse, for both CSC and CSR formats.  In the standard CSC format used in MATLAB, an m-by-n matrix A takes O(n+nnz(A)) space.  MATLAB can create huge column vectors, but not huge matrices (when n is huge).</p><pre class="codeinput">clear <span class="string">all</span>
+[c, huge] = computer ;
+C = sparse (huge, 1)    <span class="comment">% MATLAB can create a huge-by-1 sparse column</span>
+<span class="keyword">try</span>
+    C = sparse (huge, huge)     <span class="comment">% but this fails</span>
+<span class="keyword">catch</span> me
+    error_expected = me
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">C =
+   All zero sparse: 281474976710655x1
+error_expected = 
+  MException with properties:
+
+    identifier: 'MATLAB:array:SizeLimitExceeded'
+       message: 'Requested 281474976710655x281474976710655 (2097152.0GB) array exceeds maximum array size preference. Creation of arrays greater than this limit may take a long time and cause MATLAB to become unresponsive. See &lt;a href="matlab: helpview([docroot '/matlab/helptargets.map'], 'matlab_env_workspace_prefs')"&gt;array size limit&lt;/a&gt; or preference panel for more information.'
+         cause: {}
+         stack: [4x1 struct]
+    Correction: []
+</pre><p>In a GraphBLAS hypersparse matrix, an m-by-n matrix A takes only O(nnz(A)) space.  The difference can be huge if nnz (A) &lt;&lt; n.</p><pre class="codeinput">clear
+[c, huge] = computer ;
+G = GrB (huge, 1)            <span class="comment">% no problem for GraphBLAS</span>
+H = GrB (huge, huge)         <span class="comment">% this works in GraphBLAS too</span>
+</pre><pre class="codeoutput">
+G =
+
+  281474976710655x1 GraphBLAS double matrix, sparse by col:
+  no nonzeros, no entries
+
+
+H =
+
+  281474976710655x281474976710655 GraphBLAS double matrix, hypersparse by col:
+  no nonzeros, no entries
+
+</pre><p>Operations on huge hypersparse matrices are very fast; no component of the time or space complexity is Omega(n).</p><pre class="codeinput">I = randperm (huge, 2) ;
+J = randperm (huge, 2) ;
+H (I,J) = magic (2) ;        <span class="comment">% add 4 nonzeros to random locations in H</span>
+H (I,I) = 10 * [1 2 ; 3 4] ; <span class="comment">% so H^2 is not all zero</span>
+H = H^2 ;                    <span class="comment">% square H</span>
+H = (H' * 2) ;               <span class="comment">% transpose H and double the entries</span>
+K = pi * spones (H) ;
+H = H + K                    <span class="comment">% add pi to each entry in H</span>
+</pre><pre class="codeoutput">
+H =
+
+  281474976710655x281474976710655 GraphBLAS double matrix, hypersparse by col:
+  8 nonzeros, 8 entries
+
+    (27455183225557,27455183225557)    4403.14
+    (78390279669562,27455183225557)    383.142
+    (153933462881710,27455183225557)    343.142
+    (177993304104065,27455183225557)    3003.14
+    (27455183225557,177993304104065)    2003.14
+    (78390279669562,177993304104065)    183.142
+    (153933462881710,177993304104065)    143.142
+    (177993304104065,177993304104065)    1403.14
+
+</pre><h2 id="41">numel uses vpa if the matrix is really huge</h2><pre class="codeinput">e1 = numel (G)               <span class="comment">% this is huge, but still a flint</span>
+e2 = numel (H)               <span class="comment">% this is huge^2, which needs vpa</span>
+whos <span class="string">e1</span> <span class="string">e2</span>
+</pre><pre class="codeoutput">e1 =
+   2.8147e+14
+e2 =
+79228162514263774643590529025.0
+  Name      Size            Bytes  Class     Attributes
+
+  e1        1x1                 8  double              
+  e2        1x1                 8  sym                 
+
+</pre><p>All of these matrices take very little memory space:</p><pre class="codeinput">whos <span class="string">C</span> <span class="string">G</span> <span class="string">H</span> <span class="string">K</span>
+</pre><pre class="codeoutput">  Name                    Size                         Bytes  Class    Attributes
+
+  G         281474976710655x1                            989  GrB                
+  H         281474976710655x281474976710655             1308  GrB                
+  K         281474976710655x281474976710655             1308  GrB                
+
+</pre><h2 id="43">The mask and accumulator</h2><p>When not used in overloaded operators or built-in functions, many GraphBLAS methods of the form GrB.method ( ... ) can optionally use a mask and/or an accumulator operator.  If the accumulator is '+' in GrB.mxm, for example, then C = C + A*B is computed.  The mask acts much like logical indexing in MATLAB.  With a logical mask matrix M, C&lt;M&gt;=A*B allows only part of C to be assigned.  If M(i,j) is true, then C(i,j) can be modified.  If false, then C(i,j) is not modified.</p><p>For example, to set all values in C that are greater than 0.5 to 3:</p><pre class="codeinput">A = rand (3)
+C = GrB.assign (A, A &gt; 0.5, 3) ;     <span class="comment">% in GraphBLAS</span>
+C1 = GrB (A) ; C1 (A &gt; .5) = 3       <span class="comment">% also in GraphBLAS</span>
+C2 = A      ; C2 (A &gt; .5) = 3       <span class="comment">% in MATLAB</span>
+err = norm (C - C1, 1)
+err = norm (C - C2, 1)
+</pre><pre class="codeoutput">A =
+    0.9575    0.9706    0.8003
+    0.9649    0.9572    0.1419
+    0.1576    0.4854    0.4218
+
+C1 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    3
+    (2,1)    3
+    (3,1)    0.157613
+    (1,2)    3
+    (2,2)    3
+    (3,2)    0.485376
+    (1,3)    3
+    (2,3)    0.141886
+    (3,3)    0.421761
+
+C2 =
+    3.0000    3.0000    3.0000
+    3.0000    3.0000    0.1419
+    0.1576    0.4854    0.4218
+err =
+     0
+err =
+     0
+</pre><h2 id="45">The descriptor</h2><p>Most GraphBLAS functions of the form GrB.method ( ... ) take an optional last argument, called the descriptor.  It is a MATLAB struct that can modify the computations performed by the method.  'help GrB.descriptorinfo' gives all the details.  The following is a short summary of the primary settings:</p><p>d.out  = 'default' or 'replace', clears C after the accum op is used.</p><p>d.mask = 'default' or 'complement', to use M or ~M as the mask matrix;          'structural', or 'structural complement', to use the pattern           of M or ~M.</p><p>d.in0  = 'default' or 'transpose', to transpose A for C=A*B, C=A+B, etc.</p><p>d.in1  = 'default' or 'transpose', to transpose B for C=A*B, C=A+B, etc.</p><p>d.kind = 'default', 'GrB', 'sparse', or 'full'; the output of GrB.method.</p><pre class="codeinput">A = sparse (rand (2)) ;
+B = sparse (rand (2)) ;
+C1 = A'*B ;
+C2 = GrB.mxm (<span class="string">'+.*'</span>, A, B, struct (<span class="string">'in0'</span>, <span class="string">'transpose'</span>)) ;
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">err =
+     0
+</pre><h2 id="46">Integer arithmetic is different in GraphBLAS</h2><p>MATLAB supports integer arithmetic on its full matrices, using int8, int16, int32, int64, uint8, uint16, uint32, or uint64 data types.  None of these integer data types can be used to construct a MATLAB sparse matrix, which can only be double, double complex, or logical. Furthermore, C=A*B is not defined for integer types in MATLAB, except when A and/or B are scalars.</p><p>GraphBLAS supports all of those types for its sparse matrices (except for complex, which will be added in the future).  All operations are supported, including C=A*B when A or B are any integer type, for all 1,865 semirings (1,040 of which are unique).</p><p>However, integer arithmetic differs in GraphBLAS and MATLAB.  In MATLAB, integer values saturate if they exceed their maximum value.  In GraphBLAS, integer operators act in a modular fashion.  The latter is essential when computing C=A*B over a semiring.  A saturating integer operator cannot be used as a monoid since it is not associative.</p><p>The C API for GraphBLAS allows for the creation of arbitrary user-defined types, so it would be possible to create different binary operators to allow element-wise integer operations to saturate, perhaps:</p><pre class="language-matlab">C = GrB.eadd(<span class="string">'+saturate'</span>,A,B)
+</pre><p>This would require an extension to this MATLAB interface.</p><pre class="codeinput">C = uint8 (magic (3)) ;
+G = GrB (C) ;
+C1 = C * 40
+C2 = G * 40
+C3 = double (G) * 40 ;
+S = double (C1 &lt; 255) ;
+assert (isequal (double (C1).*S, double (C2).*S))
+assert (isequal (nonzeros (C2), double (mod (nonzeros (C3), 256))))
+</pre><pre class="codeoutput">C1 =
+  3x3 uint8 matrix
+   255    40   240
+   120   200   255
+   160   255    80
+
+C2 =
+
+  3x3 GraphBLAS uint8_t matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)   64
+    (2,1)   120
+    (3,1)   160
+    (1,2)   40
+    (2,2)   200
+    (3,2)   104
+    (1,3)   240
+    (2,3)   24
+    (3,3)   80
+
+</pre><h2 id="48">An example graph algorithm: breadth-first search</h2><p>The breadth-first search of a graph finds all nodes reachable from the source node, and their level, v.  v=GrB.bfs(A,s) or v=bfs_matlab(A,s) compute the same thing, but GrB.bfs uses GraphBLAS matrices and operations, while bfs_matlab uses pure MATLAB operations.  v is defined as v(s) = 1 for the source node, v(i) = 2 for nodes adjacent to the source, and so on.</p><pre class="codeinput">clear <span class="string">all</span>
+rng (<span class="string">'default'</span>) ;
+n = 1e5 ;
+A = logical (sprandn (n, n, 1e-3)) ;
+
+tic
+v1 = GrB.bfs (A, 1) ;
+gb_time = toc ;
+
+tic
+v2 = bfs_matlab (A, 1) ;
+matlab_time = toc ;
+
+assert (isequal (double (v1'), v2))
+fprintf (<span class="string">'\nnodes reached: %d of %d\n'</span>, nnz (v2), n) ;
+fprintf (<span class="string">'GraphBLAS time: %g sec\n'</span>, gb_time) ;
+fprintf (<span class="string">'MATLAB time:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+</pre><pre class="codeoutput">
+nodes reached: 100000 of 100000
+GraphBLAS time: 0.761582 sec
+MATLAB time:    0.472689 sec
+Speedup of GraphBLAS over MATLAB: 0.620667
+</pre><h2 id="49">Example graph algorithm: Luby's method in GraphBLAS</h2><p>The GrB.mis.m function is variant of Luby's randomized algorithm [Luby 1985].  It is a parallel method for finding an maximal independent set of nodes, where no two nodes are adjacent.  See the GraphBLAS/@GrB/GrB.mis.m function for details.  The graph must be symmetric with a zero-free diagonal, so A is symmetrized first and any diagonal entries are removed.</p><pre class="codeinput">A = GrB (A) ;
+A = GrB.offdiag (A|A') ;
+
+tic
+s = GrB.mis (A) ;
+toc
+fprintf (<span class="string">'# nodes in the graph: %g\n'</span>, size (A,1)) ;
+fprintf (<span class="string">'# edges: : %g\n'</span>, GrB.entries (A) / 2) ;
+fprintf (<span class="string">'size of maximal independent set found: %g\n'</span>, <span class="keyword">...</span>
+    full (double (sum (s)))) ;
+
+<span class="comment">% make sure it's independent</span>
+p = find (s) ;
+S = A (p,p) ;
+assert (GrB.entries (S) == 0)
+
+<span class="comment">% make sure it's maximal</span>
+notp = find (s == 0) ;
+S = A (notp, p) ;
+deg = GrB.vreduce (<span class="string">'+.int64'</span>, S) ;
+assert (logical (all (deg &gt; 0)))
+</pre><pre class="codeoutput">Elapsed time is 0.497827 seconds.
+# nodes in the graph: 100000
+# edges: : 9.9899e+06
+size of maximal independent set found: 2811
+</pre><h2 id="50">Sparse deep neural network</h2><p>The 2019 MIT GraphChallenge (see <a href="http://graphchallenge.org">http://graphchallenge.org</a>) is to solve a set of large sparse deep neural network problems.  In this demo, the MATLAB reference solution is compared with a solution using GraphBLAS, for a randomly constructed neural network.  See the GrB.dnn and dnn_matlab.m functions for details.</p><pre class="codeinput">clear <span class="string">all</span>
+rng (<span class="string">'default'</span>) ;
+nlayers = 16 ;
+nneurons = 4096 ;
+nfeatures = 30000 ;
+fprintf (<span class="string">'# layers:   %d\n'</span>, nlayers) ;
+fprintf (<span class="string">'# neurons:  %d\n'</span>, nneurons) ;
+fprintf (<span class="string">'# features: %d\n'</span>, nfeatures) ;
+
+tic
+Y0 = sprand (nfeatures, nneurons, 0.1) ;
+<span class="keyword">for</span> layer = 1:nlayers
+    W {layer} = sprand (nneurons, nneurons, 0.01) * 0.2 ;
+    bias {layer} = -0.2 * ones (1, nneurons) ;
+<span class="keyword">end</span>
+t_setup = toc ;
+fprintf (<span class="string">'construct problem time: %g sec\n'</span>, t_setup) ;
+
+<span class="comment">% convert the problem from MATLAB to GraphBLAS</span>
+t = tic ;
+[W_gb, bias_gb, Y0_gb] = dnn_mat2gb (W, bias, Y0) ;
+t = toc (t) ;
+fprintf (<span class="string">'setup time: %g sec\n'</span>, t) ;
+</pre><pre class="codeoutput"># layers:   16
+# neurons:  4096
+# features: 30000
+construct problem time: 6.19643 sec
+setup time: 0.428007 sec
+</pre><h2 id="51">Solving the sparse deep neural network problem with GraphbLAS</h2><p>Please wait ...</p><pre class="codeinput">tic
+Y1 = GrB.dnn (W_gb, bias_gb, Y0_gb) ;
+gb_time = toc ;
+fprintf (<span class="string">'total time in GraphBLAS: %g sec\n'</span>, gb_time) ;
+</pre><pre class="codeoutput">total time in GraphBLAS: 10.0564 sec
+</pre><h2 id="52">Solving the sparse deep neural network problem with MATLAB</h2><p>Please wait ...</p><pre class="codeinput">tic
+Y2 = dnn_matlab (W, bias, Y0) ;
+matlab_time = toc ;
+fprintf (<span class="string">'total time in MATLAB:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time) ;
+
+err = norm (Y1-Y2,1)
+</pre><pre class="codeoutput">total time in MATLAB:    85.9198 sec
+Speedup of GraphBLAS over MATLAB: 8.54381
+err =
+     0
+</pre><h2 id="53">For objects, GraphBLAS has better colon notation than MATLAB</h2><p>The MATLAB notation C = A (start:inc:fini) is very handy, and it works great if A is a MATLAB matrix.  But for objects like the GraphBLAS matrix, MATLAB starts by creating the explicit index vector I = start:inc:fini.  That's fine if the matrix is modest in size, but GraphBLAS can construct huge matrices. The problem is that 1:n cannot be explicitly constructed when n is huge.</p><p>The C API for GraphBLAS can represent the colon notation start:inc:fini in an implicit manner, so it can do the indexing without actually forming the explicit list I = start:inc:fini. But there is no access to this method using the MATLAB notation start:inc:fini.</p><p>Thus, to compute C = A (start:inc:fini) for very huge matrices, you need to use use a cell array to represent the colon notation, as { start, inc, fini }, instead of start:inc:fini. See 'help GrB.extract' and 'help.gbsubassign' for, for C(I,J)=A.  The syntax isn't conventional, but it is far faster than the MATLAB colon notation for objects, and takes far less memory when I is huge.</p><pre class="codeinput">n = 1e14 ;
+H = GrB (n, n) ;            <span class="comment">% a huge empty matrix</span>
+I = [1 1e9 1e12 1e14] ;
+M = magic (4)
+H (I,I) = M ;
+J = {1, 1e13} ;            <span class="comment">% represents 1:1e13 colon notation</span>
+C1 = H (J, J)              <span class="comment">% computes C1 = H (1:e13,1:1e13)</span>
+c = nonzeros (C1) ;
+m = nonzeros (M (1:3, 1:3)) ;
+assert (isequal (c, m)) ;
+</pre><pre class="codeoutput">M =
+    16     2     3    13
+     5    11    10     8
+     9     7     6    12
+     4    14    15     1
+
+C1 =
+
+  10000000000000x10000000000000 GraphBLAS double matrix, hypersparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    16
+    (1000000000,1)    5
+    (1000000000000,1)    9
+    (1,1000000000)    2
+    (1000000000,1000000000)    11
+    (1000000000000,1000000000)    7
+    (1,1000000000000)    3
+    (1000000000,1000000000000)    10
+    (1000000000000,1000000000000)    6
+
+</pre><pre class="codeinput"><span class="keyword">try</span>
+    <span class="comment">% try to compute the same thing with colon</span>
+    <span class="comment">% notation (1:1e13), but this fails:</span>
+    C2 = H (1:1e13, 1:1e13)
+<span class="keyword">catch</span> me
+    error_expected = me
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">error_expected = 
+  MException with properties:
+
+    identifier: 'MATLAB:array:SizeLimitExceeded'
+       message: 'Requested 10000000000000x1 (74505.8GB) array exceeds maximum array size preference. Creation of arrays greater than this limit may take a long time and cause MATLAB to become unresponsive. See &lt;a href="matlab: helpview([docroot '/matlab/helptargets.map'], 'matlab_env_workspace_prefs')"&gt;array size limit&lt;/a&gt; or preference panel for more information.'
+         cause: {}
+         stack: [4x1 struct]
+    Correction: []
+</pre><h2 id="56">Iterative solvers work as-is</h2><p>Many built-in functions work with GraphBLAS matrices unmodified.</p><pre class="codeinput">A = sparse (rand (4)) ;
+b = sparse (rand (4,1)) ;
+x = gmres (A,b)
+norm (A*x-b)
+x = gmres (GrB(A), GrB(b))
+norm (A*x-b)
+</pre><pre class="codeoutput">gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   8.6711e-16
+gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   7.2802e-16
+</pre><h2 id="57">... even in single precision</h2><pre class="codeinput">x = gmres (GrB(A,<span class="string">'single'</span>), GrB(b,<span class="string">'single'</span>))
+norm (A*x-b)
+</pre><pre class="codeoutput">gmres converged at iteration 4 to a solution with relative residual 0.
+x =
+    0.9105
+    3.8949
+   -0.5695
+   -1.3867
+ans =
+   3.5566e-07
+</pre><p>Both of the following uses of minres (A,b) fail to converge because A is not symmetric, as the method requires.  Both failures are correctly reported, and both the MATLAB version and the GraphBLAS version return the same incorrect vector x.</p><pre class="codeinput">x = minres (A, b)
+x = minres (GrB(A), GrB(b))
+</pre><pre class="codeoutput">minres stopped at iteration 4 without converging to the desired tolerance 1e-06
+because the maximum number of iterations was reached.
+The iterate returned (number 4) has relative residual 0.21.
+x =
+    0.2489
+    0.2081
+    0.0700
+    0.3928
+minres stopped at iteration 4 without converging to the desired tolerance 1e-06
+because the maximum number of iterations was reached.
+The iterate returned (number 4) has relative residual 0.21.
+
+x =
+
+  4x1 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    0.248942
+    (2,1)    0.208128
+    (3,1)    0.0699707
+    (4,1)    0.392812
+
+</pre><p>With a proper symmetric matrix</p><pre class="codeinput">A = A+A' ;
+x = minres (A, b)
+norm (A*x-b)
+x = minres (GrB(A), GrB(b))
+norm (A*x-b)
+</pre><pre class="codeoutput">minres converged at iteration 4 to a solution with relative residual 1.3e-11.
+x =
+ -114.0616
+   -1.4211
+  134.8227
+    2.0694
+ans =
+   1.3650e-11
+minres converged at iteration 4 to a solution with relative residual 1.3e-11.
+
+x =
+
+  4x1 GraphBLAS double matrix, sparse by col:
+  4 nonzeros, 4 entries
+
+    (1,1)    -114.062
+    (2,1)    -1.4211
+    (3,1)    134.823
+    (4,1)    2.0694
+
+ans =
+   1.3650e-11
+</pre><h2 id="60">Extreme performance differences between GraphBLAS and MATLAB.</h2><p>The GraphBLAS operations used so far are perhaps 2x to 50x faster than the corresponding MATLAB operations, depending on how many cores your computer has.  To run a demo illustrating a 500x or more speedup versus MATLAB, run this demo:</p><pre>  gbdemo2</pre><p>It will illustrate an assignment C(I,J)=A that can take under a second in GraphBLAS but several minutes in MATLAB.  To make the comparsion even more dramatic, try:</p><pre>  gbdemo2 (20000)</pre><p>assuming you have enough memory.  The gbdemo2 is not part of this demo since it can take a long time; it tries a range of problem sizes, and each one takes several minutes in MATLAB.</p><h2 id="61">Sparse logical indexing is much, much faster in GraphBLAS</h2><p>The mask in GraphBLAS acts much like logical indexing in MATLAB, but it is not quite the same.  MATLAB logical indexing takes the form:</p><pre>     C (M) = A (M)</pre><p>which computes the same thing as the GraphBLAS statement:</p><pre>     C = GrB.assign (C, M, A)</pre><p>The GrB.assign statement computes C(M)=A(M), and it is vastly faster than C(M)=A(M), even if the time to convert the GrB matrix back to a MATLAB sparse matrix is included.</p><p>GraphBLAS can also compute C (M) = A (M) using overloaded operators for subsref and subsasgn, but C = GrB.assign (C, M, A) is a bit faster.</p><p>First, both methods in GraphBLAS (both are very fast):</p><pre class="codeinput">clear
+n = 4000 ;
+tic
+C = sprand (n, n, 0.1) ;
+A = 100 * sprand (n, n, 0.1) ;
+M = (C &gt; 0.5) ;
+t_setup = toc ;
+fprintf (<span class="string">'nnz(C): %g, nnz(M): %g, nnz(A): %g\n'</span>, <span class="keyword">...</span>
+    nnz(C), nnz(M), nnz(A)) ;
+fprintf (<span class="string">'\nsetup time:     %g sec\n'</span>, t_setup) ;
+
+<span class="comment">% include the time to convert C1 from a GraphBLAS</span>
+<span class="comment">% matrix to a MATLAB sparse matrix:</span>
+tic
+C1 = GrB.assign (C, M, A) ;
+C1 = double (C1) ;
+gb_time = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec for GrB.assign\n'</span>, gb_time) ;
+
+<span class="comment">% now using overloaded operators, also include the time to</span>
+<span class="comment">% convert back to a MATLAB sparse matrix, for good measure:</span>
+A2 = GrB (A) ;
+C2 = GrB (C) ;
+tic
+C2 (M) = A2 (M) ;
+C2 = double (C2) ;
+gb_time2 = toc ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec for C(M)=A(M)\n'</span>, gb_time2) ;
+</pre><pre class="codeoutput">nnz(C): 1.5226e+06, nnz(M): 761163, nnz(A): 1.52245e+06
+
+setup time:     1.00999 sec
+
+GraphBLAS time: 0.026364 sec for GrB.assign
+
+GraphBLAS time: 0.125608 sec for C(M)=A(M)
+</pre><p>Please wait, this will take about 10 minutes or so ...</p><pre class="codeinput">tic
+C (M) = A (M) ;
+matlab_time = toc ;
+
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (GrB.assign)\n'</span>, gb_time) ;
+fprintf (<span class="string">'\nGraphBLAS time: %g sec (overloading)\n'</span>, gb_time2) ;
+fprintf (<span class="string">'MATLAB time:    %g sec\n'</span>, matlab_time) ;
+fprintf (<span class="string">'Speedup of GraphBLAS over MATLAB: %g\n'</span>, <span class="keyword">...</span>
+    matlab_time / gb_time2) ;
+
+<span class="comment">% GraphBLAS computes the exact same result with both methods:</span>
+assert (isequal (C1, C))
+assert (isequal (C2, C))
+C1 - C
+C2 - C
+</pre><pre class="codeoutput">
+GraphBLAS time: 0.026364 sec (GrB.assign)
+
+GraphBLAS time: 0.125608 sec (overloading)
+MATLAB time:    641.779 sec
+Speedup of GraphBLAS over MATLAB: 5109.38
+ans =
+   All zero sparse: 4000x4000
+ans =
+   All zero sparse: 4000x4000
+</pre><h2 id="63">Limitations and their future solutions</h2><p>The MATLAB interface for SuiteSparse:GraphBLAS is a work-in-progress. It has some limitations, most of which will be resolved over time.</p><p>(1) Nonblocking mode:</p><p>GraphBLAS has a 'non-blocking' mode, in which operations can be left pending and completed later.  SuiteSparse:GraphBLAS uses the non-blocking mode to speed up a sequence of assignment operations, such as C(I,J)=A.  However, in its MATLAB interface, this would require a MATLAB mexFunction to modify its inputs.  That breaks the MATLAB API standard, so it cannot be safely done.  As a result, using GraphBLAS via its MATLAB interface can be slower than when using its C API.  This restriction would not be a limitation if GraphBLAS were to be incorporated into MATLAB itself, but there is likely no way to do this in a mexFunction interface to GraphBLAS.</p><p>(2) Complex matrices:</p><p>GraphBLAS can operate on matrices with arbitrary user-defined types and operators.  The only constraint is that the type be a fixed sized typedef that can be copied with the ANSI C memcpy; variable-sized types are not yet supported.  However, in this MATLAB interface, SuiteSparse:GraphBLAS has access to only predefined types, operators, and semirings.  Complex types and operators will be added to this MATLAB interface in the future.  They already appear in the C version of GraphBLAS, with user-defined operators in GraphBLAS/Demo/Source/usercomplex.c.</p><p>(3) Integer element-wise operations:</p><p>Integer operations in MATLAB saturate, so that uint8(255)+1 is 255.  To allow for integer monoids, GraphBLAS uses modular arithmetic instead. This is the only way that C=A*B can be defined for integer semirings. However, saturating integer operators could be added in the future, so that element- wise integer operations on GraphBLAS sparse integer matrices could work just the same as their MATLAB counterparts.</p><p>So in the future, you could perhaps write this, for both sparse and dense integer matrices A and B:</p><pre>     C = GrB.eadd ('+saturate.int8', A, B)</pre><p>to compute the same thing as C=A+B in MATLAB for its full int8 matrices.  Note that MATLAB can do this only for dense integer matrices, since it doesn't support sparse integer matrices.</p><p>(4) Faster methods:</p><p>Most methods in this MATLAB interface are based on efficient parallel C functions in GraphBLAS itself, and are typically as fast or faster than the equivalent built-in operators and functions in MATLAB.</p><p>There are few notable exceptions; these will be addressed in the future. Dense matrices and vectors held as GraphBLAS objects are slower than their MATLAB counterparts.  horzcat and vertcat, for [A B] and [A;B] when either A or B are GraphBLAS matrices, are also slow, as illustrated below in the next example.</p><p>Other methods that will be faster in the future include bandwidth, istriu, istril, eps, ceil, floor, round, fix, isfinite, isinf, isnan, spfun, and A.^B.  These methods are currently implemented in m-files, not in efficient parallel C functions.</p><p>Here is an example that illustrates the performance of C = [A B]</p><pre class="codeinput">clear
+A = sparse (rand (2000)) ;
+B = sparse (rand (2000)) ;
+tic
+C1 = [A B] ;
+matlab_time = toc ;
+
+A = GrB (A) ;
+B = GrB (B) ;
+tic
+C2 = [A B] ;
+gb_time = toc ;
+
+err = norm (C1-C2,1)
+fprintf (<span class="string">'\nMATLAB: %g sec, GraphBLAS: %g sec\n'</span>, <span class="keyword">...</span>
+    matlab_time, gb_time) ;
+<span class="keyword">if</span> (gb_time &gt; matlab_time)
+    fprintf (<span class="string">'GraphBLAS is slower by a factor of %g\n'</span>, <span class="keyword">...</span>
+        gb_time / matlab_time) ;
+<span class="keyword">end</span>
+</pre><pre class="codeoutput">err =
+     0
+
+MATLAB: 0.039108 sec, GraphBLAS: 0.117829 sec
+GraphBLAS is slower by a factor of 3.01291
+</pre><p>(5) Linear indexing:</p><p>If A is an m-by-n 2D MATLAB matrix, with n &gt; 1, A(:) is a column vector of length m*n.  The index operation A(i) accesses the ith entry in the vector A(:).  This is called linear indexing in MATLAB.  It is not yet available for GraphBLAS matrices in this MATLAB interface to GraphBLAS, but it could be added in the future.</p><p>(6) Implicit singleton dimension expansion</p><p>In MATLAB C=A+B where A is m-by-n and B is a 1-by-n row vector implicitly expands B to a matrix, computing C(i,j)=A(i,j)+B(j).  This implicit expansion is not yet suported in GraphBLAS with C=A+B. However, it can be done with C = GrB.mxm ('+.+', A, diag(GrB(B))). That's an nice example of the power of semirings, but it's not immediately obvious, and not as clear a syntax as C=A+B.  The GraphBLAS/@GrB/dnn.m function uses this 'plus.plus' semiring to apply the bias to each neuron.</p><pre class="codeinput">A = magic (3)
+B = 1000:1000:3000
+C1 = A + B
+C2 = GrB.mxm (<span class="string">'+.+'</span>, A, diag (GrB (B)))
+err = norm (C1-C2,1)
+</pre><pre class="codeoutput">A =
+     8     1     6
+     3     5     7
+     4     9     2
+B =
+        1000        2000        3000
+C1 =
+        1008        2001        3006
+        1003        2005        3007
+        1004        2009        3002
+
+C2 =
+
+  3x3 GraphBLAS double matrix, sparse by col:
+  9 nonzeros, 9 entries
+
+    (1,1)    1008
+    (2,1)    1003
+    (3,1)    1004
+    (1,2)    2001
+    (2,2)    2005
+    (3,2)    2009
+    (1,3)    3006
+    (2,3)    3007
+    (3,3)    3002
+
+err =
+     0
+</pre><h2 id="70">GraphBLAS operations</h2><p>In addition to the overloaded operators (such as C=A*B) and overloaded functions (such as L=tril(A)), GraphBLAS also has methods of the form GrB.method, listed on the next page.  Most of them take an optional input matrix Cin, which is the initial value of the matrix C for the expression below, an optional mask matrix M, and an optional accumulator operator.</p><pre>    C&lt;#M,replace&gt; = accum (C, T)</pre><p>In the above expression, #M is either empty (no mask), M (with a mask matrix) or ~M (with a complemented mask matrix), as determined by the descriptor.  'replace' can be used to clear C after it is used in accum(C,T) but before it is assigned with C&lt;...&gt; = Z, where Z=accum(C,T).  The matrix T is the result of some operation, such as T=A*B for GrB.mxm, or T=op(A,B) for GrB.eadd.</p><p>A summary of these GrB.methods is on the next pages.</p><h2 id="71">Methods for the GrB class:</h2><pre class="language-matlab">These <span class="string">methods</span> <span class="string">operate</span> <span class="string">on</span> <span class="string">GraphBLAS</span> <span class="string">matrices</span> <span class="string">only</span>, and <span class="string">they</span> <span class="string">overload</span>
+the <span class="string">existing</span> <span class="string">MATLAB</span> <span class="string">functions</span> <span class="string">of</span> <span class="string">the</span> <span class="string">same</span> <span class="string">name.</span>
+</pre><pre class="language-matlab">C = GrB (<span class="keyword">...</span><span class="comment">)           construct a GraphBLAS matrix</span>
+C = sparse (G)          makes a copy of a GrB matrix
+C = full (G, <span class="keyword">...</span><span class="comment">)       adds explicit zeros or id values to a GrB matrix</span>
+C = double (G)          cast GrB matrix to MATLAB sparse double matrix
+C = logical (G)         cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">sparse</span> <span class="string">logical</span> <span class="string">matrix</span>
+C = complex (G)         cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">sparse</span> <span class="string">complex</span>
+C = single (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">single</span> <span class="string">matrix</span>
+C = int8 (G)            cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int8</span> <span class="string">matrix</span>
+C = int16 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int16</span> <span class="string">matrix</span>
+C = int32 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int32</span> <span class="string">matrix</span>
+C = int64 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">int64</span> <span class="string">matrix</span>
+C = uint8 (G)           cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint8</span> <span class="string">matrix</span>
+C = uint16 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint16</span> <span class="string">matrix</span>
+C = uint32 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint32</span> <span class="string">matrix</span>
+C = uint64 (G)          cast <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">MATLAB</span> <span class="string">full</span> <span class="string">uint64</span> <span class="string">matrix</span>
+C = cast (G,<span class="keyword">...</span><span class="comment">)        cast GrB matrix to MATLAB matrix (as above)</span>
+</pre><pre class="language-matlab">X = nonzeros (G)        extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+[I,J,X] = find (G)      extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+C = spones (G)          <span class="keyword">return</span> pattern <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span>
+disp (G, level)         display <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+display (G)             display <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>; same <span class="string">as</span> <span class="string">disp(G,2)</span>
+mn = numel (G)          m*n <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+e = nnz (G)             number <span class="string">of</span> <span class="string">entries</span> <span class="string">in</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+e = nzmax (G)           number <span class="string">of</span> <span class="string">entries</span> <span class="string">in</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+[m n] = size (G)        size <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+n = length (G)          length <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">vector</span>
+s = isempty (G)         true <span class="string">if</span> <span class="string">any</span> <span class="string">dimension</span> <span class="string">of</span> <span class="string">G</span> <span class="string">is</span> <span class="string">zero</span>
+s = issparse (G)        true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+s = ismatrix (G)        true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+s = isvector (G)        true <span class="string">if</span> <span class="string">m=1</span> <span class="string">or</span> <span class="string">n=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = iscolumn (G)        true <span class="string">if</span> <span class="string">n=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = isrow (G)           true <span class="string">if</span> <span class="string">m=1</span>, <span class="keyword">for</span> an m-by-n GrB <span class="string">matrix</span> <span class="string">G</span>
+s = isscalar (G)        true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">a</span> <span class="string">1-by-1</span> <span class="string">GrB</span> <span class="string">matrix</span>
+s = isnumeric (G)       true <span class="string">for</span> <span class="string">any</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span> <span class="string">(even logical)</span>
+s = isfloat (G)         true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">double</span>, single, complex
+s = isreal (G)          true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">not</span> <span class="string">complex</span>
+s = isinteger (G)       true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">int8</span>, int16, <span class="keyword">...</span><span class="comment">, uint64</span>
+s = islogical (G)       true <span class="string">if</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">logical</span>
+s = isa (G, classname)  check <span class="string">if</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">is</span> <span class="string">of</span> <span class="string">a</span> <span class="string">specific</span> <span class="string">class</span>
+</pre><pre class="language-matlab">C = diag (G,k)          diagonal <span class="string">matrices</span> <span class="string">and</span> <span class="string">diagonals</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+L = tril (G,k)          lower <span class="string">triangular</span> <span class="string">part</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+U = triu (G,k)          upper <span class="string">triangular</span> <span class="string">part</span> <span class="string">of</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">G</span>
+C = kron (A,B)          Kronecker <span class="string">product</span>
+C = repmat (G, <span class="keyword">...</span><span class="comment">)     replicate and tile a GraphBLAS matrix</span>
+C = reshape (G, <span class="keyword">...</span><span class="comment">)    reshape a GraphBLAS matrix</span>
+C = abs (G)             absolute value
+C = sign (G)            signum <span class="string">function</span>
+s = istril (G)          true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">lower</span> <span class="string">triangular</span>
+s = istriu (G)          true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">upper</span> <span class="string">triangular</span>
+s = isbanded (G,<span class="keyword">...</span><span class="comment">)    true if G is banded</span>
+s = isdiag (G)          true <span class="keyword">if</span> G is <span class="string">diagonal</span>
+s = ishermitian (G)     true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">Hermitian</span>
+s = issymmetric (G)     true <span class="string">if</span> <span class="string">G</span> <span class="string">is</span> <span class="string">symmetric</span>
+[lo,hi] = bandwidth (G) determine <span class="string">the</span> <span class="string">lower</span> <span class="string">&amp;</span> <span class="string">upper</span> <span class="string">bandwidth</span> <span class="string">of</span> <span class="string">G</span>
+C = sum (G, option)     reduce <span class="string">via</span> <span class="string">sum</span>, to <span class="string">vector</span> <span class="string">or</span> <span class="string">scalar</span>
+C = prod (G, option)    reduce <span class="string">via</span> <span class="string">product</span>, to <span class="string">vector</span> <span class="string">or</span> <span class="string">scalar</span>
+s = norm (G, kind)      1-norm or <span class="string">inf-norm</span> <span class="string">of</span> <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span>
+C = max (G, <span class="keyword">...</span><span class="comment">)        reduce via max, to vector or scalar</span>
+C = min (G, <span class="keyword">...</span><span class="comment">)        reduce via min, to vector or scalar</span>
+C = any (G, <span class="keyword">...</span><span class="comment">)        reduce via '|', to vector or scalar</span>
+C = all (G, <span class="keyword">...</span><span class="comment">)        reduce via '&amp;', to vector or scalar</span>
+</pre><pre class="language-matlab">C = sqrt (G)            element-wise square <span class="string">root</span>
+C = eps (G)             floating-point spacing
+C = ceil (G)            round <span class="string">towards</span> <span class="string">infinity</span>
+C = floor (G)           round <span class="string">towards</span> <span class="string">-infinity</span>
+C = round (G)           round <span class="string">towards</span> <span class="string">nearest</span>
+C = fix (G)             round <span class="string">towards</span> <span class="string">zero</span>
+C = isfinite (G)        test <span class="string">if</span> <span class="string">finite</span>
+C = isinf (G)           test <span class="string">if</span> <span class="string">infinite</span>
+C = isnan (G)           test <span class="string">if</span> <span class="string">NaN</span>
+C = spfun (fun, G)      evaluate <span class="string">a</span> <span class="string">function</span> <span class="string">on</span> <span class="string">the</span> <span class="string">entries</span> <span class="string">of</span> <span class="string">G</span>
+p = amd (G)             approximate <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = colamd (G)          column <span class="string">approximate</span> <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = symamd (G)          approximate <span class="string">minimum</span> <span class="string">degree</span> <span class="string">ordering</span>
+p = symrcm (G)          reverse <span class="string">Cuthill-McKee</span> <span class="string">ordering</span>
+[<span class="keyword">...</span><span class="comment">] = dmperm (G)      Dulmage-Mendelsohn permutation</span>
+parent = etree (G)      elimination tree
+C = conj (G)            complex conjugate
+C = real (G)            real part of a complex GraphBLAS matrix
+[V, <span class="keyword">...</span><span class="comment">] = eig (G,...)  eigenvalues and eigenvectors</span>
+assert (G)              generate an error <span class="keyword">if</span> G is <span class="string">false</span>
+C = zeros (<span class="keyword">...</span><span class="comment">,'like',G)   all-zero matrix, same type as G</span>
+C = false (<span class="keyword">...</span><span class="comment">,'like',G)   all-false logical matrix</span>
+C = ones (<span class="keyword">...</span><span class="comment">,'like',G)    matrix with all ones, same type as G</span>
+</pre><h2 id="75">Operator overloading:</h2><pre class="language-matlab">C = plus (A, B)         C = A + B
+C = minus (A, B)        C = A - B
+C = uminus (G)          C = -G
+C = uplus (G)           C = +G
+C = times (A, B)        C = A .* B
+C = mtimes (A, B)       C = A * B
+C = rdivide (A, B)      C = A ./ B
+C = ldivide (A, B)      C = A .\ B
+C = mrdivide (A, B)     C = A / B
+C = mldivide (A, B)     C = A \ B
+C = power (A, B)        C = A .^ B
+C = mpower (A, B)       C = A ^ B
+C = lt (A, B)           C = A &lt; B
+C = gt (A, B)           C = A &gt; B
+C = le (A, B)           C = A &lt;= B
+C = ge (A, B)           C = A &gt;= B
+C = ne (A, B)           C = A ~= B
+C = eq (A, B)           C = A == B
+C = and (A, B)          C = A &amp; B
+C = or (A, B)           C = A | B
+C = not (G)             C = ~G
+C = ctranspose (G)      C = G'
+C = transpose (G)       C = G.'
+C = horzcat (A, B)      C = [A , B]
+C = vertcat (A, B)      C = [A ; B]
+C = subsref (A, I, J)   C = A (I,J) or <span class="string">C</span> <span class="string">=</span> <span class="string">A</span> <span class="string">(M)</span>
+C = subsasgn (A, I, J)  C (I,J) = A
+index = end (A, k, n)   <span class="keyword">for</span> object indexing, A(1:end,1:end)
+</pre><h2 id="76">Static Methods:</h2><pre class="language-matlab">The <span class="string">Static</span> <span class="string">Methods</span> <span class="string">for</span> <span class="string">the</span> <span class="string">GrB</span> <span class="string">class</span> <span class="string">can</span> <span class="string">be</span> <span class="string">used</span> <span class="string">on</span> <span class="string">input</span> <span class="string">matrices</span> <span class="string">of</span>
+any <span class="string">kind:</span> <span class="string">GraphBLAS</span> <span class="string">sparse</span> <span class="string">matrices</span>, MATLAB <span class="string">sparse</span> <span class="string">matrices</span>, or
+MATLAB <span class="string">dense</span> <span class="string">matrices</span>, in <span class="string">any</span> <span class="string">combination.</span>  <span class="string">The</span> <span class="string">output</span> <span class="string">matrix</span> <span class="string">Cout</span> <span class="string">is</span>
+a <span class="string">GraphBLAS</span> <span class="string">matrix</span>, by <span class="string">default</span>, but <span class="string">can</span> <span class="string">be</span> <span class="string">optionally</span> <span class="string">returned</span> <span class="string">as</span> <span class="string">a</span>
+MATLAB <span class="string">sparse</span> <span class="string">or</span> <span class="string">dense</span> <span class="string">matrix.</span>  <span class="string">The</span> <span class="string">static</span> <span class="string">methods</span> <span class="string">divide</span> <span class="string">into</span> <span class="string">two</span>
+categories: those that <span class="string">perform</span> <span class="string">basic</span> <span class="string">functions</span>, and <span class="string">the</span> <span class="string">GraphBLAS</span>
+operations <span class="string">that</span> <span class="string">use</span> <span class="string">the</span> <span class="string">mask/accum.</span>
+</pre><h2 id="77">GraphBLAS basic functions:</h2><pre class="language-matlab">GrB.clear                    clear <span class="string">GraphBLAS</span> <span class="string">workspace</span> <span class="string">and</span> <span class="string">settings</span>
+GrB.descriptorinfo (d)       list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">descriptor</span>
+GrB.unopinfo (op, type)      list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">unary</span> <span class="string">operator</span>
+GrB.binopinfo (op, type)     list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">binary</span> <span class="string">operator</span>
+GrB.monoidinfo (op, type)    list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">monoid</span>
+GrB.semiringinfo (s, type)   list <span class="string">properties</span> <span class="string">of</span> <span class="string">a</span> <span class="string">semiring</span>
+t = GrB.threads (t)          set/get # of <span class="string">threads</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+c = GrB.chunk (c)            set/get chunk <span class="string">size</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+b = GrB.burble (b)           set/get burble (diagnostic output)
+result = GrB.entries (G,<span class="keyword">...</span><span class="comment">) count or query entries in a matrix</span>
+result = GrB.nonz (G,<span class="keyword">...</span><span class="comment">)    count or query nonzeros in a matrix</span>
+C = GrB.prune (A, id)        prune entries equal to id
+C = GrB.offdiag (A)          prune <span class="string">diagonal</span> <span class="string">entries</span>
+s = GrB.isfull (A)           true <span class="string">if</span> <span class="string">all</span> <span class="string">entries</span> <span class="string">present</span>
+[C,I,J] = GrB.compact (A,id) remove <span class="string">empty</span> <span class="string">rows</span> <span class="string">and</span> <span class="string">columns</span>
+G = GrB.empty (m, n)         <span class="keyword">return</span> an <span class="string">empty</span> <span class="string">GraphBLAS</span> <span class="string">matrix</span>
+s = GrB.type (A)             get <span class="string">the</span> <span class="string">type</span> <span class="string">of</span> <span class="string">a</span> <span class="string">MATLAB</span> <span class="string">or</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">A</span>
+s = GrB.issigned (type)      true <span class="string">if</span> <span class="string">type</span> <span class="string">is</span> <span class="string">signed</span>
+f = GrB.format (f)           set/get matrix <span class="string">format</span> <span class="string">to</span> <span class="string">use</span> <span class="string">in</span> <span class="string">GraphBLAS</span>
+s = GrB.isbyrow (A)          true <span class="string">if</span> <span class="string">format</span> <span class="string">f</span> <span class="string">A</span> <span class="string">is</span> <span class="string">'by row'</span>
+s = GrB.isbycol (A)          true <span class="string">if</span> <span class="string">format</span> <span class="string">f</span> <span class="string">A</span> <span class="string">is</span> <span class="string">'by col'</span>
+C = GrB.expand (scalar, A)   expand <span class="string">a</span> <span class="string">scalar</span> <span class="string">(C = scalar*spones(A))</span>
+C = GrB.eye                  identity <span class="string">matrix</span> <span class="string">of</span> <span class="string">any</span> <span class="string">type</span>
+C = GrB.speye                identity <span class="string">matrix</span> <span class="string">(of type 'double')</span>
+C = GrB.build (I, J, X, m, n, dup, type, desc)
+                             build <span class="string">a</span> <span class="string">GrB</span> <span class="string">matrix</span> <span class="string">from</span> <span class="string">list</span> <span class="string">of</span> <span class="string">entries</span>
+[I,J,X] = GrB.extracttuples (A, desc)
+                             extract <span class="string">all</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">matrix</span>
+s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
+</pre><h2 id="78">GraphBLAS operations with Cout, mask M, and accum.</h2><pre class="language-matlab">Cout = GrB.mxm (Cin, M, accum, semiring, A, B, desc)
+                sparse <span class="string">matrix-matrix</span> <span class="string">multiplication</span> <span class="string">over</span> <span class="string">a</span> <span class="string">semiring</span>
+Cout = GrB.select (Cin, M, accum, op, A, b, desc)
+                select <span class="string">a</span> <span class="string">subset</span> <span class="string">of</span> <span class="string">entries</span> <span class="string">from</span> <span class="string">a</span> <span class="string">matrix</span>
+Cout = GrB.assign (Cin, M, accum, A, I, J, desc)
+                sparse <span class="string">matrix</span> <span class="string">assignment</span>, such <span class="string">as</span> <span class="string">C(I,J)=A</span>
+Cout = GrB.subassign (Cin, M, accum, A, I, J, desc)
+                sparse <span class="string">matrix</span> <span class="string">assignment</span>, such <span class="string">as</span> <span class="string">C(I,J)=A</span>
+Cout = GrB.vreduce (Cin, M, accum, op, A, desc)
+                reduce <span class="string">a</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">a</span> <span class="string">vector</span>
+Cout = GrB.reduce (Cin, accum, op, A, desc)
+                reduce <span class="string">a</span> <span class="string">matrix</span> <span class="string">to</span> <span class="string">a</span> <span class="string">scalar</span>
+Cout = GrB.kronecker (Cin, M, accum, op, A, B, desc)
+                Kronecker <span class="string">product</span>
+Cout = GrB.trans (Cin, M, accum, A, desc)
+                transpose <span class="string">a</span> <span class="string">matrix</span>
+Cout = GrB.eadd (Cin, M, accum, op, A, B, desc)
+                element-wise addition
+Cout = GrB.emult (Cin, M, accum, op, A, B, desc)
+                element-wise multiplication
+Cout = GrB.apply (Cin, M, accum, op, A, desc)
+                apply <span class="string">a</span> <span class="string">unary</span> <span class="string">operator</span>
+Cout = GrB.extract (Cin, M, accum, A, I, J, desc)
+                extract <span class="string">submatrix</span>, like <span class="string">C=A(I,J)</span> <span class="string">in</span> <span class="string">MATLAB</span>
+</pre><p>GraphBLAS operations (with Cout, Cin arguments) take the following form:</p><pre class="language-matlab">C&lt;#M,replace&gt; = accum (C, operation (A or A', B or B'))
+</pre><pre class="language-matlab">C <span class="string">is</span> <span class="string">both</span> <span class="string">an</span> <span class="string">input</span> <span class="string">and</span> <span class="string">output</span> <span class="string">matrix.</span>  <span class="string">In</span> <span class="string">this</span> <span class="string">MATLAB</span> <span class="string">interface</span> <span class="string">to</span>
+GraphBLAS, it <span class="string">is</span> <span class="string">split</span> <span class="string">into</span> <span class="string">Cin</span> <span class="string">(the value of C on input)</span> <span class="string">and</span> <span class="string">Cout</span>
+(the value of C on output).  M is <span class="string">the</span> <span class="string">optional</span> <span class="string">mask</span> <span class="string">matrix</span>, and <span class="string">#M</span> <span class="string">is</span>
+either <span class="string">M</span> <span class="string">or</span> <span class="string">!M</span> <span class="string">depending</span> <span class="string">on</span> <span class="string">whether</span> <span class="string">or</span> <span class="string">not</span> <span class="string">the</span> <span class="string">mask</span> <span class="string">is</span> <span class="string">complemented</span>
+via <span class="string">the</span> <span class="string">desc.mask</span> <span class="string">option.</span>  <span class="string">The</span> <span class="string">replace</span> <span class="string">option</span> <span class="string">is</span> <span class="string">determined</span> <span class="string">by</span>
+desc.out; <span class="keyword">if</span> present, C <span class="string">is</span> <span class="string">cleared</span> <span class="string">after</span> <span class="string">it</span> <span class="string">is</span> <span class="string">used</span> <span class="string">in</span> <span class="string">the</span> <span class="string">accum</span>
+operation <span class="string">but</span> <span class="string">before</span> <span class="string">the</span> <span class="string">final</span> <span class="string">assignment.</span>  <span class="string">A</span> <span class="string">and/or</span> <span class="string">B</span> <span class="string">may</span> <span class="string">optionally</span>
+be <span class="string">transposed</span> <span class="string">via</span> <span class="string">the</span> <span class="string">descriptor</span> <span class="string">fields</span> <span class="string">desc.in0</span> <span class="string">and</span> <span class="string">desc.in1</span>,
+respectively.  To select <span class="string">the</span> <span class="string">format</span> <span class="string">of</span> <span class="string">Cout</span>, use <span class="string">desc.format.</span>  <span class="string">See</span>
+GrB.descriptorinfo <span class="keyword">for</span> more details.
+</pre><pre class="language-matlab">accum <span class="string">is</span> <span class="string">optional</span>; <span class="keyword">if</span> not is <span class="string">not</span> <span class="string">present</span>, then <span class="string">the</span> <span class="string">operation</span> <span class="string">becomes</span>
+C&lt;<span class="keyword">...</span><span class="comment">&gt; = operation(A,B).  Otherwise, C = C + operation(A,B) is</span>
+computed where <span class="string">'+'</span> <span class="string">is</span> <span class="string">the</span> <span class="string">accum</span> <span class="string">operator.</span>  <span class="string">It</span> <span class="string">acts</span> <span class="string">like</span> <span class="string">a</span> <span class="string">sparse</span>
+matrix <span class="string">addition</span> <span class="string">(see GrB.eadd)</span>, in <span class="string">terms</span> <span class="string">of</span> <span class="string">the</span> <span class="string">structure</span> <span class="string">of</span> <span class="string">the</span>
+result <span class="string">C</span>, but <span class="string">any</span> <span class="string">binary</span> <span class="string">operator</span> <span class="string">can</span> <span class="string">be</span> <span class="string">used.</span>
+</pre><pre class="language-matlab">The <span class="string">mask</span> <span class="string">M</span> <span class="string">acts</span> <span class="string">like</span> <span class="string">MATLAB</span> <span class="string">logical</span> <span class="string">indexing.</span>  <span class="string">If</span> <span class="string">M(i,j)=1</span> <span class="string">then</span>
+C(i,j) can <span class="string">be</span> <span class="string">modified</span>; <span class="keyword">if</span> zero, it <span class="string">cannot</span> <span class="string">be</span> <span class="string">modified</span> <span class="string">by</span> <span class="string">the</span>
+operation.
+</pre><h2 id="80">Static Methods for graph algorithms:</h2><pre class="language-matlab">r = GrB.pagerank (A, opts) ;            <span class="comment">% PageRank of a matrix</span>
+C = GrB.ktruss (A, k, check) ;          <span class="comment">% k-truss</span>
+s = GrB.tricount (A, check) ;           <span class="comment">% triangle count</span>
+L = GrB.laplacian (A, type, check) ;    <span class="comment">% Laplacian graph</span>
+C = GrB.incidence (A, <span class="keyword">...</span><span class="comment">) ;            % incidence matrix</span>
+[v, parent] = GrB.bfs (A, s, <span class="keyword">...</span><span class="comment">) ;     % breadth-first search</span>
+iset = GrB.mis (A, check) ;             <span class="comment">% maximal independent set</span>
+Y = GrB.dnn (W, bias, Y0) ;             <span class="comment">% deep neural network</span>
+</pre><pre class="language-matlab">More <span class="string">graph</span> <span class="string">algorithms</span> <span class="string">will</span> <span class="string">be</span> <span class="string">added</span> <span class="string">in</span> <span class="string">the</span> <span class="string">future.</span>
+</pre><p>Thanks for watching!</p><p>Tim Davis, Texas A&amp;M University, <a href="http://faculty.cse.tamu.edu/davis">http://faculty.cse.tamu.edu/davis</a> See also sparse, doc sparse, and <a href="https://twitter.com/DocSparse">https://twitter.com/DocSparse</a></p><p class="footer"><br><a href="https://www.mathworks.com/products/matlab/">Published with MATLAB&reg; R2019b</a><br></p></div><!--
+##### SOURCE BEGIN #####
+%% GraphBLAS: graph algorithms in the language of linear algebra
+% GraphBLAS is a library for creating graph algorithms based on sparse
+% linear algebraic operations over semirings.  Visit http://graphblas.org
+% for more details and resources.  See also the SuiteSparse:GraphBLAS
+% User Guide in this package.
+%
+% SuiteSparse:GraphBLAS, (c) 2017-2020, Tim Davis, Texas A&M University,
+% http://faculty.cse.tamu.edu/davis
+
+%% GraphBLAS: faster and more general sparse matrices for MATLAB
+% GraphBLAS is not only useful for creating graph algorithms; it also
+% supports a wide range of sparse matrix data types and operations.
+% MATLAB can compute C=A*B with just two semirings: 'plus.times.double'
+% and 'plus.times.complex' for complex matrices.  GraphBLAS has 1,040
+% unique built-in semirings, such as 'max.plus'
+% (https://en.wikipedia.org/wiki/Tropical_semiring).  These semirings can
+% be used to construct a wide variety of graph algorithms, based on
+% operations on sparse adjacency matrices.
+%
+% GraphBLAS supports sparse double and single precision matrices,
+% logical, and sparse integer matrices: int8, int16, int32, int64, uint8,
+% uint16, uint32, and uint64.  Complex matrices will be added in the
+% future.
+
+clear all
+format compact
+rng ('default') ;
+X = 100 * rand (2) ;
+G = GrB (X)              % GraphBLAS copy of a matrix X, same type
+
+%% Sparse integer matrices
+% Here's an int8 version of the same matrix:
+
+S = int8 (G)            % convert G to a full MATLAB int8 matrix
+G = GrB (X, 'int8')      % a GraphBLAS sparse int8 matrix
+
+%% Sparse single-precision matrices
+% Matrix operations in GraphBLAS are typically as fast, or faster than
+% MATLAB.  Here's an unfair comparison: computing X^2 with MATLAB in
+% double precision and with GraphBLAS in single precision.  You would
+% naturally expect GraphBLAS to be faster. 
+%
+% Please wait ...
+
+n = 1e5 ;
+X = spdiags (rand (n, 201), -100:100, n, n) ;
+G = GrB (X, 'single') ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+tic
+X2 = X^2 ;
+matlab_time = toc ;
+fprintf ('\nGraphBLAS time: %g sec (in single)\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec (in double)\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% Mixing MATLAB and GraphBLAS matrices
+% The error in the last computation is about eps('single') since
+% GraphBLAS did its computation in single precision, while MATLAB used
+% double precision.  MATLAB and GraphBLAS matrices can be easily
+% combined, as in X2-G2.  The sparse single precision matrices take less
+% memory space.
+
+err = norm (X2 - G2, 1) / norm (X2,1)
+eps ('single')
+whos G G2 X X2
+
+%% Faster matrix operations
+% But even with standard double precision sparse matrices, GraphBLAS is
+% typically faster than the built-in MATLAB methods.  Here's a fair
+% comparison:
+
+G = GrB (X) ;
+tic
+G2 = G^2 ;
+gb_time = toc ;
+err = norm (X2 - G2, 1) / norm (X2,1)
+fprintf ('\nGraphBLAS time: %g sec (in double)\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec (in double)\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% A wide range of semirings
+% MATLAB can only compute C=A*B using the standard '+.*.double' and
+% '+.*.complex' semirings.  A semiring is defined in terms of a string,
+% 'add.mult.type', where 'add' is a monoid that takes the place of the
+% additive operator, 'mult' is the multiplicative operator, and 'type' is
+% the data type for the two inputs to the mult operator (the type
+% defaults to the type of A for C=A*B).
+%
+% In the standard semiring, C=A*B is defined as:
+%
+%   C(i,j) = sum (A(i,:).' .* B(:,j))
+%
+% using 'plus' as the monoid and 'times' as the multiplicative operator.
+% But in a more general semiring, 'sum' can be any monoid, which is an
+% associative and commutative operator that has an identity value.  For
+% example, in the 'max.plus' tropical algebra, C(i,j) for C=A*B is
+% defined as:
+%
+%   C(i,j) = max (A(i,:).' + B(:,j))
+
+%%
+% This can be computed in GraphBLAS with:
+%
+%   C = GrB.mxm ('max.+', A, B)
+
+n = 3 ;
+A = rand (n) ;
+B = rand (n) ;
+C = zeros (n) ;
+for i = 1:n
+    for j = 1:n
+        C(i,j) = max (A (i,:).' + B (:,j)) ;
+    end
+end
+C2 = GrB.mxm ('max.+', A, B) ;
+fprintf ('\nerr = norm (C-C2,1) = %g\n', norm (C-C2,1)) ;
+
+%% The max.plus tropical semiring
+% Here are details of the "max.plus" tropical semiring.  The identity
+% value is -inf since max(x,-inf) = max (-inf,x) = -inf for any x.
+
+GrB.semiringinfo ('max.+.double') ;
+
+%% A boolean semiring
+% MATLAB cannot multiply two logical matrices.  MATLAB R2019a converts
+% them to double and uses the conventional +.*.double semiring instead.
+% In GraphBLAS, this is the common Boolean 'or.and.logical' semiring,
+% which is widely used in linear algebraic graph algorithms.
+
+GrB.semiringinfo ('|.&.logical') ;
+
+%%
+clear
+A = sparse (rand (3) > 0.5)
+B = sparse (rand (3) > 0.2)
+
+%%
+try
+    % MATLAB R2019a does this by casting A and B to double
+    C1 = A*B
+catch
+    % MATLAB R2018a throws an error
+    fprintf ('MATLAB R2019a required for C=A*B with logical\n') ;
+    fprintf ('matrices.  Explicitly converting to double:\n') ;
+    C1 = double (A) * double (B)
+end
+C2 = GrB (A) * GrB (B)
+
+%%
+% Note that C1 is a MATLAB sparse double matrix, and contains non-binary
+% values.  C2 is a GraphBLAS logical matrix.
+whos
+GrB.type (C2)
+
+%% GraphBLAS operators, monoids, and semirings
+% The C interface for SuiteSparse:GraphBLAS allows for arbitrary types
+% and operators to be constructed.  However, the MATLAB interface to
+% SuiteSparse:GraphBLAS is restricted to pre-defined types and operators:
+% a mere 11 types, 66 unary operators, 275 binary operators, 44 monoids,
+% 16 select operators, and 1,865 semirings (1,040 of which are unique,
+% since some binary operators are equivalent: 'min.logical' and
+% '&.logical' are the same thing, for example).  The complex type and
+% its binary operators, monoids, and semirings will be added in the
+% near future.
+%
+% That gives you a lot of tools to create all kinds of interesting
+% graph algorithms.  For example:
+%
+%   GrB.bfs    % breadth-first search
+%   GrB.dnn    % sparse deep neural network (http://graphchallenge.org)
+%   GrB.mis    % maximal independent set
+%
+% See 'help GrB.binopinfo' for a list of the binary operators, and
+% 'help GrB.monoidinfo' for the ones that can be used as the additive
+% monoid in a semiring.
+
+%% 
+help GrB.binopinfo
+
+%% 
+help GrB.monoidinfo
+
+%% Element-wise operations
+% Binary operators can be used in element-wise matrix operations, like
+% C=A+B and C=A.*B.  For the matrix addition C=A+B, the pattern of C is
+% the set union of A and B, and the '+' operator is applied for entries
+% in the intersection.  Entries in A but not B, or in B but not A, are
+% assigned to C without using the operator.  The '+' operator is used for
+% C=A+B but any operator can be used with GrB.eadd.
+
+%%
+A = GrB (sprand (3, 3, 0.5)) ;
+B = GrB (sprand (3, 3, 0.5)) ;
+C1 = A + B
+C2 = GrB.eadd ('+', A, B)
+err = norm (C1-C2,1)
+
+%% Subtracting two matrices
+% A-B and GrB.eadd ('-', A, B) are not the same thing, since the '-'
+% operator is not applied to an entry that is in B but not A.
+
+C1 = A-B 
+C2 = GrB.eadd ('-', A, B)
+
+%% 
+% But these give the same result
+
+C1 = A-B 
+C2 = GrB.eadd ('+', A, GrB.apply ('-', B))
+err = norm (C1-C2,1)
+
+%% Element-wise 'multiplication'
+% For C = A.*B, the result C is the set intersection of the pattern of A
+% and B.  The operator is applied to entries in both A and B.  Entries in
+% A but not B, or B but not A, do not appear in the result C.
+
+C1 = A.*B
+C2 = GrB.emult ('*', A, B) 
+C3 = double (A) .* double (B)
+
+%%
+% Just as in GrB.eadd, any operator can be used in GrB.emult:
+
+A
+B
+C2 = GrB.emult ('max', A, B) 
+
+%% Overloaded operators
+% The following operators all work as you would expect for any matrix.
+% The matrices A and B can be GraphBLAS matrices, or MATLAB sparse or
+% dense matrices, in any combination, or scalars where appropriate:
+%
+%    A+B   A-B  A*B   A.*B  A./B  A.\B  A.^b   A/b   C=A(I,J)
+%    -A    +A   ~A    A'    A.'   A&B   A|B    b\A   C(I,J)=A
+%    A~=B  A>B  A==B  A<=B  A>=B  A<B   [A,B]  [A;B]
+%    A(1:end,1:end)
+%
+% For A^b, b must be a non-negative integer.
+
+C1 = [A B] ;
+C2 = [double(A) double(B)] ;
+assert (isequal (double (C1), C2))
+
+%%
+C1 = A^2
+C2 = double (A)^2 ;
+err = norm (C1 - C2, 1)
+assert (err < 1e-12)
+
+%%
+C1 = A (1:2,2:end)
+A = double (A) ;
+C2 = A (1:2,2:end) ;
+assert (isequal (double (C1), C2))
+
+%% Overloaded functions
+% Many MATLAB built-in functions can be used with GraphBLAS matrices:
+%
+% A few differences with the built-in functions:
+%
+%   S = sparse (G)        % makes a copy of a GrB matrix
+%   F = full (G)          % adds explicit zeros, so numel(F)==nnz(F)
+%   F = full (G,type,id)  % adds explicit identity values to a GrB matrix
+%   disp (G, level)       % display a GrB matrix G; level=2 is the default.
+%
+% In the list below, the first set of Methods are overloaded built-in
+% methods.  They are used as-is on GraphBLAS matrices, such as C=abs(G).
+% The Static methods are prefixed with "GrB.", as in C = GrB.apply ( ... ).
+
+%%
+
+methods GrB
+
+%% Zeros are handled differently
+% Explicit zeros cannot be automatically dropped from a GraphBLAS matrix,
+% like they are in MATLAB sparse matrices.  In a shortest-path problem,
+% for example, an edge A(i,j) that is missing has an infinite weight,
+% (the monoid identity of min(x,y) is +inf).  A zero edge weight A(i,j)=0
+% is very different from an entry that is not present in A.  However, if
+% a GraphBLAS matrix is converted into a MATLAB sparse matrix, explicit
+% zeros are dropped, which is the convention for a MATLAB sparse matrix.
+% They can also be dropped from a GraphBLAS matrix using the GrB.select
+% method.
+
+%%
+
+G = GrB (magic (2)) ;
+G (1,1) = 0      % G(1,1) still appears as an explicit entry
+A = double (G)   % but it's dropped when converted to MATLAB sparse
+H = GrB.select ('nonzero', G)  % drops the explicit zeros from G
+fprintf ('nnz (G): %d  nnz (A): %g nnz (H): %g\n', ...
+    nnz (G), nnz (A), nnz (H)) ;
+fprintf ('num entries in G: %d\n', GrB.entries (G)) ;
+
+%% Displaying contents of a GraphBLAS matrix
+% Unlike MATLAB, the default is to display just a few entries of a GrB matrix.
+% Here are all 100 entries of a 10-by-10 matrix, using a non-default disp(G,3):
+
+%%
+G = GrB (rand (10)) ;
+% display everything:
+disp (G,3)
+
+%%
+% That was disp(G,3), so every entry was printed.  It's a little long, so
+% the default is not to print everything.
+
+%%
+% With the default display (level = 2):
+G
+
+%%
+% That was disp(G,2) or just display(G), which is what is printed by a
+% MATLAB statement that doesn't have a trailing semicolon.  With
+% level = 1, disp(G,1) gives just a terse summary:
+disp (G,1)
+
+%% Storing a matrix by row or by column
+% MATLAB stores its sparse matrices by column, refered to as 'standard
+% CSC' in SuiteSparse:GraphBLAS.  In the CSC (compressed sparse column)
+% format, each column of the matrix is stored as a list of entries, with
+% their value and row index.  In the CSR (compressed sparse row) format,
+% each row is stored as a list of values and their column indices.
+% GraphBLAS uses both CSC and CSR, and the two formats can be intermixed
+% arbitrarily.  In its C interface, the default format is CSR.  However,
+% for better compatibility with MATLAB, this MATLAB interface for
+% SuiteSparse:GraphBLAS uses CSC by default instead. 
+
+%%
+rng ('default') ;
+GrB.clear ;                      % clear all prior GraphBLAS settings
+fprintf ('the default format is: %s\n', GrB.format) ;
+C = sparse (rand (2))
+G = GrB (C)
+GrB.format (G)
+
+%%
+% Many graph algorithms work better in CSR format, with matrices stored
+% by row.  For example, it is common to use A(i,j) for the edge (i,j),
+% and many graph algorithms need to access the out-adjacencies of nodes,
+% which is the row A(i,;) for node i.  If the CSR format is desired,
+% GrB.format ('by row') tells GraphBLAS to create all subsequent matrices
+% in the CSR format.  Converting from a MATLAB sparse matrix (in standard
+% CSC format) takes a little more time (requiring a transpose), but
+% subsequent graph algorithms can be faster.
+
+%%
+G = GrB (C, 'by row')
+fprintf ('the format of G is:    %s\n', GrB.format (G)) ;
+H = GrB (C)
+fprintf ('the format of H is:    %s\n', GrB.format (H)) ;
+err = norm (H-G,1)
+
+%% Hypersparse matrices
+% SuiteSparse:GraphBLAS can use two kinds of sparse matrix data
+% structures: standard and hypersparse, for both CSC and CSR formats.  In
+% the standard CSC format used in MATLAB, an m-by-n matrix A takes
+% O(n+nnz(A)) space.  MATLAB can create huge column vectors, but not huge
+% matrices (when n is huge).
+
+clear all
+[c, huge] = computer ;
+C = sparse (huge, 1)    % MATLAB can create a huge-by-1 sparse column
+try
+    C = sparse (huge, huge)     % but this fails
+catch me
+    error_expected = me
+end
+
+%%
+% In a GraphBLAS hypersparse matrix, an m-by-n matrix A takes only
+% O(nnz(A)) space.  The difference can be huge if nnz (A) << n.
+
+clear
+[c, huge] = computer ;
+G = GrB (huge, 1)            % no problem for GraphBLAS
+H = GrB (huge, huge)         % this works in GraphBLAS too
+
+%%
+% Operations on huge hypersparse matrices are very fast; no component of
+% the time or space complexity is Omega(n).
+
+I = randperm (huge, 2) ;
+J = randperm (huge, 2) ;
+H (I,J) = magic (2) ;        % add 4 nonzeros to random locations in H
+H (I,I) = 10 * [1 2 ; 3 4] ; % so H^2 is not all zero
+H = H^2 ;                    % square H
+H = (H' * 2) ;               % transpose H and double the entries
+K = pi * spones (H) ;
+H = H + K                    % add pi to each entry in H
+
+%% numel uses vpa if the matrix is really huge
+e1 = numel (G)               % this is huge, but still a flint
+e2 = numel (H)               % this is huge^2, which needs vpa
+whos e1 e2
+
+%%
+% All of these matrices take very little memory space:
+whos C G H K
+
+%% The mask and accumulator
+% When not used in overloaded operators or built-in functions, many
+% GraphBLAS methods of the form GrB.method ( ... ) can optionally use a
+% mask and/or an accumulator operator.  If the accumulator is '+' in
+% GrB.mxm, for example, then C = C + A*B is computed.  The mask acts much
+% like logical indexing in MATLAB.  With a logical mask matrix M,
+% C<M>=A*B allows only part of C to be assigned.  If M(i,j) is true, then
+% C(i,j) can be modified.  If false, then C(i,j) is not modified.
+%
+% For example, to set all values in C that are greater than 0.5 to 3:
+
+%%
+A = rand (3) 
+C = GrB.assign (A, A > 0.5, 3) ;     % in GraphBLAS
+C1 = GrB (A) ; C1 (A > .5) = 3       % also in GraphBLAS
+C2 = A      ; C2 (A > .5) = 3       % in MATLAB
+err = norm (C - C1, 1)
+err = norm (C - C2, 1)
+
+%% The descriptor
+% Most GraphBLAS functions of the form GrB.method ( ... ) take an optional
+% last argument, called the descriptor.  It is a MATLAB struct that can
+% modify the computations performed by the method.  'help
+% GrB.descriptorinfo' gives all the details.  The following is a short
+% summary of the primary settings:
+%
+% d.out  = 'default' or 'replace', clears C after the accum op is used.
+%
+% d.mask = 'default' or 'complement', to use M or ~M as the mask matrix;
+%          'structural', or 'structural complement', to use the pattern
+%           of M or ~M.
+%
+% d.in0  = 'default' or 'transpose', to transpose A for C=A*B, C=A+B, etc.
+%
+% d.in1  = 'default' or 'transpose', to transpose B for C=A*B, C=A+B, etc.
+%
+% d.kind = 'default', 'GrB', 'sparse', or 'full'; the output of GrB.method.
+
+A = sparse (rand (2)) ;
+B = sparse (rand (2)) ;
+C1 = A'*B ;
+C2 = GrB.mxm ('+.*', A, B, struct ('in0', 'transpose')) ;
+err = norm (C1-C2,1)
+
+%% Integer arithmetic is different in GraphBLAS
+% MATLAB supports integer arithmetic on its full matrices, using int8,
+% int16, int32, int64, uint8, uint16, uint32, or uint64 data types.  None
+% of these integer data types can be used to construct a MATLAB sparse
+% matrix, which can only be double, double complex, or logical.
+% Furthermore, C=A*B is not defined for integer types in MATLAB, except
+% when A and/or B are scalars.
+%
+% GraphBLAS supports all of those types for its sparse matrices (except
+% for complex, which will be added in the future).  All operations are
+% supported, including C=A*B when A or B are any integer type, for all
+% 1,865 semirings (1,040 of which are unique).
+%
+% However, integer arithmetic differs in GraphBLAS and MATLAB.  In
+% MATLAB, integer values saturate if they exceed their maximum value.  In
+% GraphBLAS, integer operators act in a modular fashion.  The latter is
+% essential when computing C=A*B over a semiring.  A saturating integer
+% operator cannot be used as a monoid since it is not associative.
+%
+% The C API for GraphBLAS allows for the creation of arbitrary
+% user-defined types, so it would be possible to create different binary
+% operators to allow element-wise integer operations to saturate,
+% perhaps:
+%
+%   C = GrB.eadd('+saturate',A,B)
+%
+% This would require an extension to this MATLAB interface.
+
+%%
+C = uint8 (magic (3)) ;
+G = GrB (C) ;
+C1 = C * 40
+C2 = G * 40
+C3 = double (G) * 40 ;
+S = double (C1 < 255) ;
+assert (isequal (double (C1).*S, double (C2).*S))
+assert (isequal (nonzeros (C2), double (mod (nonzeros (C3), 256))))
+
+%% An example graph algorithm: breadth-first search
+% The breadth-first search of a graph finds all nodes reachable from the
+% source node, and their level, v.  v=GrB.bfs(A,s) or v=bfs_matlab(A,s)
+% compute the same thing, but GrB.bfs uses GraphBLAS matrices and
+% operations, while bfs_matlab uses pure MATLAB operations.  v is defined
+% as v(s) = 1 for the source node, v(i) = 2 for nodes adjacent to the
+% source, and so on.
+
+clear all
+rng ('default') ;
+n = 1e5 ;
+A = logical (sprandn (n, n, 1e-3)) ;
+
+tic
+v1 = GrB.bfs (A, 1) ;
+gb_time = toc ;
+
+tic
+v2 = bfs_matlab (A, 1) ;
+matlab_time = toc ;
+
+assert (isequal (double (v1'), v2))
+fprintf ('\nnodes reached: %d of %d\n', nnz (v2), n) ;
+fprintf ('GraphBLAS time: %g sec\n', gb_time) ;
+fprintf ('MATLAB time:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+%% Example graph algorithm: Luby's method in GraphBLAS
+% The GrB.mis.m function is variant of Luby's randomized algorithm [Luby
+% 1985].  It is a parallel method for finding an maximal independent set
+% of nodes, where no two nodes are adjacent.  See the
+% GraphBLAS/@GrB/GrB.mis.m function for details.  The graph must be
+% symmetric with a zero-free diagonal, so A is symmetrized first and any
+% diagonal entries are removed.
+
+A = GrB (A) ;
+A = GrB.offdiag (A|A') ;
+
+tic
+s = GrB.mis (A) ;
+toc
+fprintf ('# nodes in the graph: %g\n', size (A,1)) ;
+fprintf ('# edges: : %g\n', GrB.entries (A) / 2) ;
+fprintf ('size of maximal independent set found: %g\n', ...
+    full (double (sum (s)))) ;
+
+% make sure it's independent
+p = find (s) ;
+S = A (p,p) ;
+assert (GrB.entries (S) == 0)
+
+% make sure it's maximal
+notp = find (s == 0) ;
+S = A (notp, p) ;
+deg = GrB.vreduce ('+.int64', S) ;
+assert (logical (all (deg > 0)))
+
+%% Sparse deep neural network
+% The 2019 MIT GraphChallenge (see http://graphchallenge.org) is to solve
+% a set of large sparse deep neural network problems.  In this demo, the
+% MATLAB reference solution is compared with a solution using GraphBLAS,
+% for a randomly constructed neural network.  See the GrB.dnn and
+% dnn_matlab.m functions for details.
+
+clear all
+rng ('default') ;
+nlayers = 16 ;
+nneurons = 4096 ;
+nfeatures = 30000 ;
+fprintf ('# layers:   %d\n', nlayers) ;
+fprintf ('# neurons:  %d\n', nneurons) ;
+fprintf ('# features: %d\n', nfeatures) ;
+
+tic
+Y0 = sprand (nfeatures, nneurons, 0.1) ;
+for layer = 1:nlayers
+    W {layer} = sprand (nneurons, nneurons, 0.01) * 0.2 ;
+    bias {layer} = -0.2 * ones (1, nneurons) ;
+end
+t_setup = toc ;
+fprintf ('construct problem time: %g sec\n', t_setup) ;
+
+% convert the problem from MATLAB to GraphBLAS
+t = tic ;
+[W_gb, bias_gb, Y0_gb] = dnn_mat2gb (W, bias, Y0) ;
+t = toc (t) ;
+fprintf ('setup time: %g sec\n', t) ;
+
+%% Solving the sparse deep neural network problem with GraphbLAS
+% Please wait ...
+
+tic
+Y1 = GrB.dnn (W_gb, bias_gb, Y0_gb) ;
+gb_time = toc ;
+fprintf ('total time in GraphBLAS: %g sec\n', gb_time) ;
+
+%% Solving the sparse deep neural network problem with MATLAB
+% Please wait ...
+
+tic
+Y2 = dnn_matlab (W, bias, Y0) ;
+matlab_time = toc ;
+fprintf ('total time in MATLAB:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time) ;
+
+err = norm (Y1-Y2,1)
+
+%% For objects, GraphBLAS has better colon notation than MATLAB
+% The MATLAB notation C = A (start:inc:fini) is very handy, and
+% it works great if A is a MATLAB matrix.  But for objects like
+% the GraphBLAS matrix, MATLAB starts by creating the explicit
+% index vector I = start:inc:fini.  That's fine if the matrix is
+% modest in size, but GraphBLAS can construct huge matrices.
+% The problem is that 1:n cannot be explicitly constructed when n
+% is huge.
+%
+% The C API for GraphBLAS can represent the colon notation 
+% start:inc:fini in an implicit manner, so it can do the indexing
+% without actually forming the explicit list I = start:inc:fini.
+% But there is no access to this method using the MATLAB notation
+% start:inc:fini.
+%
+% Thus, to compute C = A (start:inc:fini) for very huge matrices,
+% you need to use use a cell array to represent the colon notation,
+% as { start, inc, fini }, instead of start:inc:fini. See
+% 'help GrB.extract' and 'help.gbsubassign' for, for C(I,J)=A.  The
+% syntax isn't conventional, but it is far faster than the MATLAB
+% colon notation for objects, and takes far less memory when I is huge.
+
+%%
+n = 1e14 ;
+H = GrB (n, n) ;            % a huge empty matrix
+I = [1 1e9 1e12 1e14] ;
+M = magic (4)
+H (I,I) = M ;
+J = {1, 1e13} ;            % represents 1:1e13 colon notation
+C1 = H (J, J)              % computes C1 = H (1:e13,1:1e13)
+c = nonzeros (C1) ;
+m = nonzeros (M (1:3, 1:3)) ;
+assert (isequal (c, m)) ;
+
+%%
+try
+    % try to compute the same thing with colon
+    % notation (1:1e13), but this fails:
+    C2 = H (1:1e13, 1:1e13)
+catch me
+    error_expected = me
+end
+
+%% Iterative solvers work as-is
+% Many built-in functions work with GraphBLAS matrices unmodified.
+
+A = sparse (rand (4)) ;
+b = sparse (rand (4,1)) ;
+x = gmres (A,b)
+norm (A*x-b)
+x = gmres (GrB(A), GrB(b))
+norm (A*x-b)
+
+%% ... even in single precision
+x = gmres (GrB(A,'single'), GrB(b,'single'))
+norm (A*x-b)
+
+%%
+% Both of the following uses of minres (A,b) fail to converge because A
+% is not symmetric, as the method requires.  Both failures are correctly
+% reported, and both the MATLAB version and the GraphBLAS version return
+% the same incorrect vector x.
+
+x = minres (A, b)
+x = minres (GrB(A), GrB(b))
+
+%%
+% With a proper symmetric matrix
+
+A = A+A' ;
+x = minres (A, b)
+norm (A*x-b)
+x = minres (GrB(A), GrB(b))
+norm (A*x-b)
+
+%% Extreme performance differences between GraphBLAS and MATLAB.
+% The GraphBLAS operations used so far are perhaps 2x to 50x faster than
+% the corresponding MATLAB operations, depending on how many cores your
+% computer has.  To run a demo illustrating a 500x or more speedup versus
+% MATLAB, run this demo:
+%
+%    gbdemo2
+%
+% It will illustrate an assignment C(I,J)=A that can take under a second
+% in GraphBLAS but several minutes in MATLAB.  To make the comparsion
+% even more dramatic, try:
+%
+%    gbdemo2 (20000)
+%
+% assuming you have enough memory.  The gbdemo2 is not part of this demo
+% since it can take a long time; it tries a range of problem sizes,
+% and each one takes several minutes in MATLAB.
+
+%% Sparse logical indexing is much, much faster in GraphBLAS
+% The mask in GraphBLAS acts much like logical indexing in MATLAB, but it
+% is not quite the same.  MATLAB logical indexing takes the form:
+%
+%       C (M) = A (M)
+%
+% which computes the same thing as the GraphBLAS statement:
+%
+%       C = GrB.assign (C, M, A)
+%
+% The GrB.assign statement computes C(M)=A(M), and it is vastly faster
+% than C(M)=A(M), even if the time to convert the GrB matrix back to a
+% MATLAB sparse matrix is included.
+%
+% GraphBLAS can also compute C (M) = A (M) using overloaded operators
+% for subsref and subsasgn, but C = GrB.assign (C, M, A) is a bit faster.
+%
+% First, both methods in GraphBLAS (both are very fast):
+
+clear
+n = 4000 ;
+tic
+C = sprand (n, n, 0.1) ;
+A = 100 * sprand (n, n, 0.1) ;
+M = (C > 0.5) ;
+t_setup = toc ;
+fprintf ('nnz(C): %g, nnz(M): %g, nnz(A): %g\n', ...
+    nnz(C), nnz(M), nnz(A)) ;
+fprintf ('\nsetup time:     %g sec\n', t_setup) ;
+
+% include the time to convert C1 from a GraphBLAS
+% matrix to a MATLAB sparse matrix:
+tic
+C1 = GrB.assign (C, M, A) ;
+C1 = double (C1) ;
+gb_time = toc ;
+fprintf ('\nGraphBLAS time: %g sec for GrB.assign\n', gb_time) ;
+
+% now using overloaded operators, also include the time to
+% convert back to a MATLAB sparse matrix, for good measure:
+A2 = GrB (A) ;
+C2 = GrB (C) ;
+tic
+C2 (M) = A2 (M) ;
+C2 = double (C2) ;
+gb_time2 = toc ;
+fprintf ('\nGraphBLAS time: %g sec for C(M)=A(M)\n', gb_time2) ;
+
+%%
+% Please wait, this will take about 10 minutes or so ...
+
+tic
+C (M) = A (M) ;
+matlab_time = toc ;
+
+fprintf ('\nGraphBLAS time: %g sec (GrB.assign)\n', gb_time) ;
+fprintf ('\nGraphBLAS time: %g sec (overloading)\n', gb_time2) ;
+fprintf ('MATLAB time:    %g sec\n', matlab_time) ;
+fprintf ('Speedup of GraphBLAS over MATLAB: %g\n', ...
+    matlab_time / gb_time2) ;
+
+% GraphBLAS computes the exact same result with both methods:
+assert (isequal (C1, C))
+assert (isequal (C2, C))
+C1 - C
+C2 - C
+
+%% Limitations and their future solutions
+% The MATLAB interface for SuiteSparse:GraphBLAS is a work-in-progress.
+% It has some limitations, most of which will be resolved over time.
+%
+% (1) Nonblocking mode:
+%
+% GraphBLAS has a 'non-blocking' mode, in which operations can be left
+% pending and completed later.  SuiteSparse:GraphBLAS uses the
+% non-blocking mode to speed up a sequence of assignment operations, such
+% as C(I,J)=A.  However, in its MATLAB interface, this would require a
+% MATLAB mexFunction to modify its inputs.  That breaks the MATLAB API
+% standard, so it cannot be safely done.  As a result, using GraphBLAS
+% via its MATLAB interface can be slower than when using its C API.  This
+% restriction would not be a limitation if GraphBLAS were to be
+% incorporated into MATLAB itself, but there is likely no way to do this
+% in a mexFunction interface to GraphBLAS.
+
+%%
+% (2) Complex matrices:
+%
+% GraphBLAS can operate on matrices with arbitrary user-defined types and
+% operators.  The only constraint is that the type be a fixed sized
+% typedef that can be copied with the ANSI C memcpy; variable-sized types
+% are not yet supported.  However, in this MATLAB interface,
+% SuiteSparse:GraphBLAS has access to only predefined types, operators,
+% and semirings.  Complex types and operators will be added to this
+% MATLAB interface in the future.  They already appear in the C version
+% of GraphBLAS, with user-defined operators in
+% GraphBLAS/Demo/Source/usercomplex.c.
+
+%%
+% (3) Integer element-wise operations:
+%
+% Integer operations in MATLAB saturate, so that uint8(255)+1 is 255.  To
+% allow for integer monoids, GraphBLAS uses modular arithmetic instead.
+% This is the only way that C=A*B can be defined for integer semirings.
+% However, saturating integer operators could be added in the future, so
+% that element- wise integer operations on GraphBLAS sparse integer
+% matrices could work just the same as their MATLAB counterparts.
+%
+% So in the future, you could perhaps write this, for both sparse and
+% dense integer matrices A and B:
+%
+%       C = GrB.eadd ('+saturate.int8', A, B)
+%
+% to compute the same thing as C=A+B in MATLAB for its full int8
+% matrices.  Note that MATLAB can do this only for dense integer
+% matrices, since it doesn't support sparse integer matrices.
+
+%%
+% (4) Faster methods:
+%
+% Most methods in this MATLAB interface are based on efficient parallel C
+% functions in GraphBLAS itself, and are typically as fast or faster than
+% the equivalent built-in operators and functions in MATLAB.
+%
+% There are few notable exceptions; these will be addressed in the future.
+% Dense matrices and vectors held as GraphBLAS objects are slower than
+% their MATLAB counterparts.  horzcat and vertcat, for [A B] and [A;B]
+% when either A or B are GraphBLAS matrices, are also slow, as
+% illustrated below in the next example.
+%
+% Other methods that will be faster in the future include bandwidth,
+% istriu, istril, eps, ceil, floor, round, fix, isfinite, isinf, isnan,
+% spfun, and A.^B.  These methods are currently implemented in
+% m-files, not in efficient parallel C functions.
+
+%%
+% Here is an example that illustrates the performance of C = [A B]
+clear
+A = sparse (rand (2000)) ;
+B = sparse (rand (2000)) ;
+tic
+C1 = [A B] ;
+matlab_time = toc ;
+
+A = GrB (A) ;
+B = GrB (B) ;
+tic
+C2 = [A B] ;
+gb_time = toc ;
+
+err = norm (C1-C2,1)
+fprintf ('\nMATLAB: %g sec, GraphBLAS: %g sec\n', ...
+    matlab_time, gb_time) ;
+if (gb_time > matlab_time)
+    fprintf ('GraphBLAS is slower by a factor of %g\n', ...
+        gb_time / matlab_time) ;
+end
+
+%%
+% (5) Linear indexing:
+%
+% If A is an m-by-n 2D MATLAB matrix, with n > 1, A(:) is a column vector
+% of length m*n.  The index operation A(i) accesses the ith entry in the
+% vector A(:).  This is called linear indexing in MATLAB.  It is not yet
+% available for GraphBLAS matrices in this MATLAB interface to GraphBLAS,
+% but it could be added in the future.
+
+%%
+% (6) Implicit singleton dimension expansion 
+%
+% In MATLAB C=A+B where A is m-by-n and B is a 1-by-n row vector
+% implicitly expands B to a matrix, computing C(i,j)=A(i,j)+B(j).  This
+% implicit expansion is not yet suported in GraphBLAS with C=A+B.
+% However, it can be done with C = GrB.mxm ('+.+', A, diag(GrB(B))).
+% That's an nice example of the power of semirings, but it's not
+% immediately obvious, and not as clear a syntax as C=A+B.  The
+% GraphBLAS/@GrB/dnn.m function uses this 'plus.plus' semiring to
+% apply the bias to each neuron.
+
+A = magic (3)
+B = 1000:1000:3000
+C1 = A + B
+C2 = GrB.mxm ('+.+', A, diag (GrB (B)))
+err = norm (C1-C2,1)
+
+%% GraphBLAS operations
+% In addition to the overloaded operators (such as C=A*B) and overloaded
+% functions (such as L=tril(A)), GraphBLAS also has methods of the form
+% GrB.method, listed on the next page.  Most of them take an optional
+% input matrix Cin, which is the initial value of the matrix C for the
+% expression below, an optional mask matrix M, and an optional
+% accumulator operator.
+%
+%      C<#M,replace> = accum (C, T)
+%
+% In the above expression, #M is either empty (no mask), M (with a mask
+% matrix) or ~M (with a complemented mask matrix), as determined by the
+% descriptor.  'replace' can be used to clear C after it is used in
+% accum(C,T) but before it is assigned with C<...> = Z, where
+% Z=accum(C,T).  The matrix T is the result of some operation, such as
+% T=A*B for GrB.mxm, or T=op(A,B) for GrB.eadd.
+%
+% A summary of these GrB.methods is on the next pages.
+
+%% Methods for the GrB class:
+%
+%   These methods operate on GraphBLAS matrices only, and they overload
+%   the existing MATLAB functions of the same name.
+%
+%   C = GrB (...)           construct a GraphBLAS matrix
+%   C = sparse (G)          makes a copy of a GrB matrix
+%   C = full (G, ...)       adds explicit zeros or id values to a GrB matrix
+%   C = double (G)          cast GrB matrix to MATLAB sparse double matrix
+%   C = logical (G)         cast GrB matrix to MATLAB sparse logical matrix
+%   C = complex (G)         cast GrB matrix to MATLAB sparse complex
+%   C = single (G)          cast GrB matrix to MATLAB full single matrix
+%   C = int8 (G)            cast GrB matrix to MATLAB full int8 matrix
+%   C = int16 (G)           cast GrB matrix to MATLAB full int16 matrix
+%   C = int32 (G)           cast GrB matrix to MATLAB full int32 matrix
+%   C = int64 (G)           cast GrB matrix to MATLAB full int64 matrix
+%   C = uint8 (G)           cast GrB matrix to MATLAB full uint8 matrix
+%   C = uint16 (G)          cast GrB matrix to MATLAB full uint16 matrix
+%   C = uint32 (G)          cast GrB matrix to MATLAB full uint32 matrix
+%   C = uint64 (G)          cast GrB matrix to MATLAB full uint64 matrix
+%   C = cast (G,...)        cast GrB matrix to MATLAB matrix (as above)
+
+%%
+%   X = nonzeros (G)        extract all entries from a GrB matrix
+%   [I,J,X] = find (G)      extract all entries from a GrB matrix
+%   C = spones (G)          return pattern of GrB matrix
+%   disp (G, level)         display a GrB matrix G
+%   display (G)             display a GrB matrix G; same as disp(G,2)
+%   mn = numel (G)          m*n for an m-by-n GrB matrix G
+%   e = nnz (G)             number of entries in a GrB matrix G
+%   e = nzmax (G)           number of entries in a GrB matrix G
+%   [m n] = size (G)        size of a GrB matrix G
+%   n = length (G)          length of a GrB vector
+%   s = isempty (G)         true if any dimension of G is zero
+%   s = issparse (G)        true for any GrB matrix G
+%   s = ismatrix (G)        true for any GrB matrix G
+%   s = isvector (G)        true if m=1 or n=1, for an m-by-n GrB matrix G
+%   s = iscolumn (G)        true if n=1, for an m-by-n GrB matrix G
+%   s = isrow (G)           true if m=1, for an m-by-n GrB matrix G
+%   s = isscalar (G)        true if G is a 1-by-1 GrB matrix
+%   s = isnumeric (G)       true for any GrB matrix G (even logical)
+%   s = isfloat (G)         true if GrB matrix is double, single, complex
+%   s = isreal (G)          true if GrB matrix is not complex
+%   s = isinteger (G)       true if GrB matrix is int8, int16, ..., uint64
+%   s = islogical (G)       true if GrB matrix is logical
+%   s = isa (G, classname)  check if a GrB matrix is of a specific class
+
+%%
+%   C = diag (G,k)          diagonal matrices and diagonals of GrB matrix G
+%   L = tril (G,k)          lower triangular part of GrB matrix G
+%   U = triu (G,k)          upper triangular part of GrB matrix G
+%   C = kron (A,B)          Kronecker product
+%   C = repmat (G, ...)     replicate and tile a GraphBLAS matrix
+%   C = reshape (G, ...)    reshape a GraphBLAS matrix
+%   C = abs (G)             absolute value
+%   C = sign (G)            signum function
+%   s = istril (G)          true if G is lower triangular
+%   s = istriu (G)          true if G is upper triangular
+%   s = isbanded (G,...)    true if G is banded
+%   s = isdiag (G)          true if G is diagonal
+%   s = ishermitian (G)     true if G is Hermitian
+%   s = issymmetric (G)     true if G is symmetric
+%   [lo,hi] = bandwidth (G) determine the lower & upper bandwidth of G
+%   C = sum (G, option)     reduce via sum, to vector or scalar
+%   C = prod (G, option)    reduce via product, to vector or scalar
+%   s = norm (G, kind)      1-norm or inf-norm of a GrB matrix
+%   C = max (G, ...)        reduce via max, to vector or scalar
+%   C = min (G, ...)        reduce via min, to vector or scalar
+%   C = any (G, ...)        reduce via '|', to vector or scalar
+%   C = all (G, ...)        reduce via '&', to vector or scalar
+
+%%
+%   C = sqrt (G)            element-wise square root
+%   C = eps (G)             floating-point spacing
+%   C = ceil (G)            round towards infinity
+%   C = floor (G)           round towards -infinity
+%   C = round (G)           round towards nearest
+%   C = fix (G)             round towards zero
+%   C = isfinite (G)        test if finite
+%   C = isinf (G)           test if infinite
+%   C = isnan (G)           test if NaN
+%   C = spfun (fun, G)      evaluate a function on the entries of G
+%   p = amd (G)             approximate minimum degree ordering
+%   p = colamd (G)          column approximate minimum degree ordering
+%   p = symamd (G)          approximate minimum degree ordering
+%   p = symrcm (G)          reverse Cuthill-McKee ordering
+%   [...] = dmperm (G)      Dulmage-Mendelsohn permutation
+%   parent = etree (G)      elimination tree
+%   C = conj (G)            complex conjugate
+%   C = real (G)            real part of a complex GraphBLAS matrix
+%   [V, ...] = eig (G,...)  eigenvalues and eigenvectors
+%   assert (G)              generate an error if G is false
+%   C = zeros (...,'like',G)   all-zero matrix, same type as G
+%   C = false (...,'like',G)   all-false logical matrix
+%   C = ones (...,'like',G)    matrix with all ones, same type as G
+
+%% Operator overloading:
+%
+%   C = plus (A, B)         C = A + B
+%   C = minus (A, B)        C = A - B
+%   C = uminus (G)          C = -G
+%   C = uplus (G)           C = +G
+%   C = times (A, B)        C = A .* B
+%   C = mtimes (A, B)       C = A * B
+%   C = rdivide (A, B)      C = A ./ B
+%   C = ldivide (A, B)      C = A .\ B
+%   C = mrdivide (A, B)     C = A / B
+%   C = mldivide (A, B)     C = A \ B
+%   C = power (A, B)        C = A .^ B
+%   C = mpower (A, B)       C = A ^ B
+%   C = lt (A, B)           C = A < B
+%   C = gt (A, B)           C = A > B
+%   C = le (A, B)           C = A <= B
+%   C = ge (A, B)           C = A >= B
+%   C = ne (A, B)           C = A ~= B
+%   C = eq (A, B)           C = A == B
+%   C = and (A, B)          C = A & B
+%   C = or (A, B)           C = A | B
+%   C = not (G)             C = ~G
+%   C = ctranspose (G)      C = G'
+%   C = transpose (G)       C = G.'
+%   C = horzcat (A, B)      C = [A , B]
+%   C = vertcat (A, B)      C = [A ; B]
+%   C = subsref (A, I, J)   C = A (I,J) or C = A (M)
+%   C = subsasgn (A, I, J)  C (I,J) = A
+%   index = end (A, k, n)   for object indexing, A(1:end,1:end)
+
+%% Static Methods:
+%
+%   The Static Methods for the GrB class can be used on input matrices of
+%   any kind: GraphBLAS sparse matrices, MATLAB sparse matrices, or
+%   MATLAB dense matrices, in any combination.  The output matrix Cout is
+%   a GraphBLAS matrix, by default, but can be optionally returned as a
+%   MATLAB sparse or dense matrix.  The static methods divide into two
+%   categories: those that perform basic functions, and the GraphBLAS
+%   operations that use the mask/accum.
+
+%% GraphBLAS basic functions:
+%
+%   GrB.clear                    clear GraphBLAS workspace and settings
+%   GrB.descriptorinfo (d)       list properties of a descriptor
+%   GrB.unopinfo (op, type)      list properties of a unary operator
+%   GrB.binopinfo (op, type)     list properties of a binary operator
+%   GrB.monoidinfo (op, type)    list properties of a monoid
+%   GrB.semiringinfo (s, type)   list properties of a semiring
+%   t = GrB.threads (t)          set/get # of threads to use in GraphBLAS
+%   c = GrB.chunk (c)            set/get chunk size to use in GraphBLAS
+%   b = GrB.burble (b)           set/get burble (diagnostic output)
+%   result = GrB.entries (G,...) count or query entries in a matrix
+%   result = GrB.nonz (G,...)    count or query nonzeros in a matrix
+%   C = GrB.prune (A, id)        prune entries equal to id
+%   C = GrB.offdiag (A)          prune diagonal entries
+%   s = GrB.isfull (A)           true if all entries present
+%   [C,I,J] = GrB.compact (A,id) remove empty rows and columns
+%   G = GrB.empty (m, n)         return an empty GraphBLAS matrix
+%   s = GrB.type (A)             get the type of a MATLAB or GrB matrix A
+%   s = GrB.issigned (type)      true if type is signed
+%   f = GrB.format (f)           set/get matrix format to use in GraphBLAS
+%   s = GrB.isbyrow (A)          true if format f A is 'by row'
+%   s = GrB.isbycol (A)          true if format f A is 'by col'
+%   C = GrB.expand (scalar, A)   expand a scalar (C = scalar*spones(A))
+%   C = GrB.eye                  identity matrix of any type
+%   C = GrB.speye                identity matrix (of type 'double')
+%   C = GrB.build (I, J, X, m, n, dup, type, desc)
+%                                build a GrB matrix from list of entries
+%   [I,J,X] = GrB.extracttuples (A, desc)
+%                                extract all entries from a matrix
+%   s = GrB.normdiff (A, B, kind)   norm (A-B,kind)
+
+%% GraphBLAS operations with Cout, mask M, and accum.
+%
+%   Cout = GrB.mxm (Cin, M, accum, semiring, A, B, desc)
+%                   sparse matrix-matrix multiplication over a semiring
+%   Cout = GrB.select (Cin, M, accum, op, A, b, desc)
+%                   select a subset of entries from a matrix
+%   Cout = GrB.assign (Cin, M, accum, A, I, J, desc)
+%                   sparse matrix assignment, such as C(I,J)=A
+%   Cout = GrB.subassign (Cin, M, accum, A, I, J, desc)
+%                   sparse matrix assignment, such as C(I,J)=A
+%   Cout = GrB.vreduce (Cin, M, accum, op, A, desc)
+%                   reduce a matrix to a vector
+%   Cout = GrB.reduce (Cin, accum, op, A, desc)
+%                   reduce a matrix to a scalar
+%   Cout = GrB.kronecker (Cin, M, accum, op, A, B, desc)
+%                   Kronecker product
+%   Cout = GrB.trans (Cin, M, accum, A, desc)
+%                   transpose a matrix
+%   Cout = GrB.eadd (Cin, M, accum, op, A, B, desc)
+%                   element-wise addition
+%   Cout = GrB.emult (Cin, M, accum, op, A, B, desc)
+%                   element-wise multiplication
+%   Cout = GrB.apply (Cin, M, accum, op, A, desc)
+%                   apply a unary operator
+%   Cout = GrB.extract (Cin, M, accum, A, I, J, desc)
+%                   extract submatrix, like C=A(I,J) in MATLAB
+%
+%%
+% GraphBLAS operations (with Cout, Cin arguments) take the following form:
+%
+%   C<#M,replace> = accum (C, operation (A or A', B or B'))
+%
+%   C is both an input and output matrix.  In this MATLAB interface to
+%   GraphBLAS, it is split into Cin (the value of C on input) and Cout
+%   (the value of C on output).  M is the optional mask matrix, and #M is
+%   either M or !M depending on whether or not the mask is complemented
+%   via the desc.mask option.  The replace option is determined by
+%   desc.out; if present, C is cleared after it is used in the accum
+%   operation but before the final assignment.  A and/or B may optionally
+%   be transposed via the descriptor fields desc.in0 and desc.in1,
+%   respectively.  To select the format of Cout, use desc.format.  See
+%   GrB.descriptorinfo for more details.
+%
+%   accum is optional; if not is not present, then the operation becomes
+%   C<...> = operation(A,B).  Otherwise, C = C + operation(A,B) is
+%   computed where '+' is the accum operator.  It acts like a sparse
+%   matrix addition (see GrB.eadd), in terms of the structure of the
+%   result C, but any binary operator can be used.
+%
+%   The mask M acts like MATLAB logical indexing.  If M(i,j)=1 then
+%   C(i,j) can be modified; if zero, it cannot be modified by the
+%   operation.
+
+%% Static Methods for graph algorithms:
+%
+%   r = GrB.pagerank (A, opts) ;            % PageRank of a matrix
+%   C = GrB.ktruss (A, k, check) ;          % k-truss
+%   s = GrB.tricount (A, check) ;           % triangle count
+%   L = GrB.laplacian (A, type, check) ;    % Laplacian graph
+%   C = GrB.incidence (A, ...) ;            % incidence matrix
+%   [v, parent] = GrB.bfs (A, s, ...) ;     % breadth-first search
+%   iset = GrB.mis (A, check) ;             % maximal independent set
+%   Y = GrB.dnn (W, bias, Y0) ;             % deep neural network
+%
+%   More graph algorithms will be added in the future.
+%
+% Thanks for watching!
+%
+% Tim Davis, Texas A&M University, http://faculty.cse.tamu.edu/davis
+% See also sparse, doc sparse, and https://twitter.com/DocSparse
+
+
+##### SOURCE END #####
+--></body></html>
diff --git a/GraphBLAS/test/Contents.m b/GraphBLAS/test/Contents.m
index fd8e6a1e98..f1dce9f141 100644
--- a/GraphBLAS/test/Contents.m
+++ b/GraphBLAS/test/Contents.m
@@ -74,6 +74,10 @@
 %  gbtest69  - test flip
 %  gbtest70  - test GrB.random
 %  gbtest71  - test GrB.selectopinfo
+%  gbtest72  - test any-pair semiring
+%  gbtest73  - test GrB.normdiff
+%  gbtest97  - test A*x performance
+%  gbtest98  - test A'*x performance
 %  gbtest99  - test GrB.bfs and plot (graph (G))
 %
 % Utilities:
@@ -82,6 +86,6 @@
 %  gbtest_types  - return a cell array of strings, listing all types
 %  gbtest_eq     - tests if A and B are equal, after dropping zeros.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
diff --git a/GraphBLAS/test/README.md b/GraphBLAS/test/README.md
index d08f590a2c..4da469f125 100644
--- a/GraphBLAS/test/README.md
+++ b/GraphBLAS/test/README.md
@@ -37,6 +37,6 @@ GraphBLAS resides in your file system.
 See the tcov subfolder to run the test with statement coverage of the
 C mexFunctions and utility routines.
 
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
diff --git a/GraphBLAS/test/gbtest.m b/GraphBLAS/test/gbtest.m
index 98143e592f..1ee81313b8 100644
--- a/GraphBLAS/test/gbtest.m
+++ b/GraphBLAS/test/gbtest.m
@@ -24,92 +24,95 @@
 %
 % See also GrB.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % gbtest3 requires ../demo/dnn_matlab.m and ../demo/dnn_mat2gb.m.
 demo_folder = fullfile (fileparts (mfilename ('fullpath')), '../demo') ;
 addpath (demo_folder) ;
+rng ('default') ;
 
 try
     GrB.init
 catch
 end
 
-gbtest0
-gbtest1
-gbtest2
-gbtest3
-gbtest4
-gbtest5
-gbtest6
-gbtest7
-gbtest8
-gbtest9
-gbtest10
-gbtest11
-gbtest12
-gbtest13
-gbtest14
-gbtest15
-gbtest16
-gbtest17
-gbtest18
-gbtest19
-gbtest20
-gbtest21
-gbtest22
-gbtest23
-gbtest24
-gbtest25
-gbtest26
-gbtest27
-gbtest28
-gbtest29
-gbtest30
-gbtest31
-gbtest32
-gbtest33
-gbtest34
-gbtest35
-gbtest36
-gbtest37
-gbtest38
-gbtest39
-gbtest40
-gbtest41
-gbtest42
-gbtest43
-gbtest44
-gbtest45
-gbtest46
-gbtest47
-gbtest48
-gbtest49
-gbtest50
-gbtest51
-gbtest52
-gbtest53
-gbtest54
-gbtest55
-gbtest56
-gbtest57
-gbtest58
-gbtest59
-gbtest60
-gbtest61
-gbtest62
-gbtest63
-gbtest64
-gbtest65
-gbtest66
-gbtest67
-gbtest68
-gbtest69
-gbtest70
-gbtest71
+gbtest0   % test GrB.clear
+gbtest1   % test GrB
+gbtest2   % list all binary operators
+gbtest3   % test dnn
+gbtest4   % list all possible semirings
+gbtest5   % test GrB.descriptorinfo
+gbtest6   % test GrB.mxm
+gbtest7   % test GrB.build
+gbtest8   % test GrB.select
+gbtest9   % test eye and speye
+gbtest10  % test GrB.assign
+gbtest11  % test GrB, sparse
+gbtest12  % test GrB.eadd, GrB.emult
+gbtest13  % test find and GrB.extracttuples
+gbtest14  % test kron and GrB.kronecker
+gbtest15  % list all unary operators
+gbtest16  % test GrB.extract
+gbtest17  % test GrB.trans
+gbtest18  % test comparators (and, or, >, ...)
+gbtest19  % test mpower
+gbtest20  % test bandwidth, isdiag, ceil, floor, round, fix
+gbtest21  % test isfinite, isinf, isnan
+gbtest22  % test reduce to scalar
+gbtest23  % test min and max
+gbtest24  % test any, all
+gbtest25  % test diag, tril, triu
+gbtest26  % test typecasting
+gbtest27  % test conversion to full
+gbtest28  % test GrB.build
+gbtest29  % test subsref and subsasgn with logical indexing
+gbtest30  % test colon notation
+gbtest31  % test GrB and casting
+gbtest32  % test nonzeros
+gbtest33  % test spones, numel, nzmax, size, length, isempty, issparse, ...
+gbtest34  % test repmat
+gbtest35  % test reshape
+gbtest36  % test abs, sign
+gbtest37  % test istril, istriu, isbanded, isdiag, ishermitian, ...
+gbtest38  % test sqrt, eps, ceil, floor, round, fix, real, conj, ...
+gbtest39  % test amd, colamd, symamd, symrcm, dmperm, etree
+gbtest40  % test sum, prod, max, min, any, all, norm
+gbtest41  % test ones, zeros, false
+gbtest42  % test for nan
+gbtest43  % test error handling
+gbtest44  % test subsasgn, mtimes, plus, false, ...
+gbtest45  % test GrB.vreduce
+gbtest46  % test GrB.subassign and GrB.assign
+gbtest47  % test GrB.entries, GrB.nonz, numel
+gbtest48  % test GrB.apply
+gbtest49  % test GrB.prune
+gbtest50  % test GrB.ktruss and GrB.tricount
+gbtest51  % test GrB.tricount
+gbtest52  % test GrB.format
+gbtest53  % test GrB.monoidinfo
+gbtest54  % test GrB.compact
+gbtest55  % test disp
+gbtest56  % test GrB.empty
+gbtest57  % test fprintf and sprintf
+gbtest58  % test uplus
+gbtest59  % test end
+gbtest60  % test issigned
+gbtest61  % test GrB.laplacian
+gbtest62  % test ldivide, rdivide, mldivide, mrdivide
+gbtest63  % test GrB.incidence
+gbtest64  % test GrB.pagerank
+gbtest65  % test GrB.mis
+gbtest66  % test graph
+gbtest67  % test digraph
+gbtest68  % test isequal
+gbtest69  % test flip
+gbtest70  % test GrB.random
+gbtest71  % test GrB.selectopinfo
+gbtest72  % test any-pair semiring
+gbtest73  % test GrB.normdiff
 
-gbtest99    % last test since it creates a figure
+gbtest99  % test GrB.bfs and plot (graph (G))
 
 fprintf ('\ngbtest: all tests passed\n') ;
 
diff --git a/GraphBLAS/test/gbtest0.m b/GraphBLAS/test/gbtest0.m
index 2dbbfa7850..280d91046b 100644
--- a/GraphBLAS/test/gbtest0.m
+++ b/GraphBLAS/test/gbtest0.m
@@ -4,7 +4,11 @@
 GrB.clear
 
 assert (isequal (GrB.format, 'by col')) ;
-assert (isequal (GrB.chunk, 4096)) ;
+assert (isequal (GrB.chunk, 64*1024)) ;
+
+GrB.burble (1) ;
+GrB.burble (0) ;
+assert (~GrB.burble) ;
 
 fprintf ('default # of threads: %d\n', GrB.threads) ;
 
diff --git a/GraphBLAS/test/gbtest1.m b/GraphBLAS/test/gbtest1.m
index 624ee833f2..4766ce99b9 100644
--- a/GraphBLAS/test/gbtest1.m
+++ b/GraphBLAS/test/gbtest1.m
@@ -1,11 +1,11 @@
 function gbtest1
 %GBTEST1 test GrB
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
-X = 100 * sprand (3, 4, 0.4)
+X = 100 * sprand (3, 4, 0.4) %#ok<*NOPRT>
 
 % types = { 'double' } ;
 
diff --git a/GraphBLAS/test/gbtest10.m b/GraphBLAS/test/gbtest10.m
index c6b72365cc..517823b386 100644
--- a/GraphBLAS/test/gbtest10.m
+++ b/GraphBLAS/test/gbtest10.m
@@ -1,7 +1,7 @@
 function gbtest10
 %GBTEST10 test GrB.assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest11.m b/GraphBLAS/test/gbtest11.m
index e6c23d70a7..bd36f37e15 100644
--- a/GraphBLAS/test/gbtest11.m
+++ b/GraphBLAS/test/gbtest11.m
@@ -1,12 +1,12 @@
 function gbtest11
 %GBTEST11 test GrB, sparse
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
 A = 100 * rand (4) ;
-A (1,1) = 0
+A (1,1) = 0 %#ok<*NOPRT>
 S = sparse (A)
 assert (gbtest_eq (S, double (full (GrB (S)))))
 assert (gbtest_eq (S, double (full (full (GrB (S))))))
diff --git a/GraphBLAS/test/gbtest12.m b/GraphBLAS/test/gbtest12.m
index 62ba73f6ec..9ab7df3a02 100644
--- a/GraphBLAS/test/gbtest12.m
+++ b/GraphBLAS/test/gbtest12.m
@@ -1,7 +1,7 @@
 function gbtest12
 %GBTEST12 test GrB.eadd, GrB.emult
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -12,7 +12,7 @@
 D = A.*B ;
 
 G = GrB.eadd ('+', A, B) ;
-err = norm (C-G, 1)
+err = norm (C-G, 1) %#ok<*NOPRT>
 assert (logical (err < 1e-12))
 
 H = GrB.emult ('*', A, B) ;
@@ -35,7 +35,7 @@
 
 d.kind = 'GrB' ;
 G = GrB.eadd ('+', A, B, d) ;
-err = norm (C-G, 1)
+err = norm (C-G, 1) %#ok<*NASGU>
 
 H = GrB.emult ('*', A, B, d) ;
 err = norm (D-H, 1)
@@ -43,7 +43,7 @@
 E = sparse (rand (2)) ;
 C = E + A+B ;
 G = GrB.eadd (E, '+', '+', A, B) ;
-C-G
+C-G %#ok<*MNEFF>
 
 F = sparse (rand (2)) ;
 D = F + A.*B ;
diff --git a/GraphBLAS/test/gbtest13.m b/GraphBLAS/test/gbtest13.m
index 43d864f527..a37fa01372 100644
--- a/GraphBLAS/test/gbtest13.m
+++ b/GraphBLAS/test/gbtest13.m
@@ -1,13 +1,13 @@
 function gbtest13
 %GBTEST13 test find and GrB.extracttuples
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 list = gbtest_types ;
 
 A = 100 * rand (3) ;
-[I, J, X] = find (A) ;
+[I, J, X] = find (A) ; %#ok<*ASGLU>
 I_0 = int64 (I) - 1 ;
 J_0 = int64 (J) - 1 ;
 A (1,1) = 0 ;
diff --git a/GraphBLAS/test/gbtest14.m b/GraphBLAS/test/gbtest14.m
index 81a69b53eb..c44cffc703 100644
--- a/GraphBLAS/test/gbtest14.m
+++ b/GraphBLAS/test/gbtest14.m
@@ -1,7 +1,7 @@
 function gbtest14
 %GBTEST14 test kron and GrB.kronecker
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -33,6 +33,7 @@
 d.kind = 'GrB' ;
 G = GrB.kronecker ('*', A, B, d) ;
 err = norm (C-G, 1) ;
+assert (err < 1e-12) ;
 
 d2 = d ;
 d2.in1 = 'transpose' ;
diff --git a/GraphBLAS/test/gbtest15.m b/GraphBLAS/test/gbtest15.m
index 8efef3efe6..3526a4c4e7 100644
--- a/GraphBLAS/test/gbtest15.m
+++ b/GraphBLAS/test/gbtest15.m
@@ -1,11 +1,11 @@
 function gbtest15
 %GBTEST15 list all unary operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 types = gbtest_types ;
-ops = { 'identity', '~', '-', '1', 'minv', 'abs' }
+ops = { 'identity', '~', '-', '1', 'minv', 'abs' } ;
 
 for k1 = 1:length (ops)
     for k2 = 1:length (types)
diff --git a/GraphBLAS/test/gbtest16.m b/GraphBLAS/test/gbtest16.m
index 55f8c903b1..ade9443e6a 100644
--- a/GraphBLAS/test/gbtest16.m
+++ b/GraphBLAS/test/gbtest16.m
@@ -1,7 +1,7 @@
 function gbtest16
 %GBTEST16 test GrB.extract
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest17.m b/GraphBLAS/test/gbtest17.m
index b7c8252e48..4a32540fd3 100644
--- a/GraphBLAS/test/gbtest17.m
+++ b/GraphBLAS/test/gbtest17.m
@@ -1,7 +1,7 @@
 function gbtest17
 %GBTEST17 test GrB.trans
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest18.m b/GraphBLAS/test/gbtest18.m
index f2da986f54..e00a9edbc5 100644
--- a/GraphBLAS/test/gbtest18.m
+++ b/GraphBLAS/test/gbtest18.m
@@ -1,7 +1,7 @@
 function gbtest18
 %GBTEST18 test comparators (and, or, >, ...)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -17,7 +17,7 @@
         n = 5 ;
     end
 
-    MA = sprand (m,n, 0.5) ;    A (2,2) = 2 ;
+    MA = sprand (m,n, 0.5) ;    A (2,2) = 2 ; %#ok<*NASGU>
     MB = sprand (m,n, 0.5) ;    B (2,2) = 2 ;
 
     if (rand < 0.1)
diff --git a/GraphBLAS/test/gbtest19.m b/GraphBLAS/test/gbtest19.m
index 6f5a0b8ce3..fd132099f7 100644
--- a/GraphBLAS/test/gbtest19.m
+++ b/GraphBLAS/test/gbtest19.m
@@ -1,7 +1,7 @@
 function gbtest19
 %GBTEST19 test mpower
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest2.m b/GraphBLAS/test/gbtest2.m
index 85686987bc..d275a31682 100644
--- a/GraphBLAS/test/gbtest2.m
+++ b/GraphBLAS/test/gbtest2.m
@@ -1,7 +1,7 @@
 function gbtest2
 %GBTEST2 list all binary operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 optype = gbtest_types ;
@@ -16,7 +16,7 @@
 
         op = opname ;
         if (k2 > 0)
-            op = [op '.' optype{k2}] ;
+            op = [op '.' optype{k2}] ; %#ok<*AGROW>
         end
         fprintf ('\nop: [%s]\n', op) ;
         if (k2 > 0)
diff --git a/GraphBLAS/test/gbtest20.m b/GraphBLAS/test/gbtest20.m
index 1e80573863..2716773934 100644
--- a/GraphBLAS/test/gbtest20.m
+++ b/GraphBLAS/test/gbtest20.m
@@ -1,7 +1,7 @@
 function gbtest20
 %GBTEST20 test bandwidth, isdiag, ceil, floor, round, fix
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest21.m b/GraphBLAS/test/gbtest21.m
index b044a88c7b..ac27ae2fcb 100644
--- a/GraphBLAS/test/gbtest21.m
+++ b/GraphBLAS/test/gbtest21.m
@@ -1,7 +1,7 @@
 function gbtest21
 %GBTEST21 test isfinite, isinf, isnan
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -13,7 +13,7 @@
             if (rand < 0.1)
                 A = int32 (full (A)) ;
             else
-                A (1,1) = nan ;
+                A (1,1) = nan ; %#ok<*SPRIX>
                 A (2,2) = inf ;
             end
             G = GrB (A) ;
diff --git a/GraphBLAS/test/gbtest22.m b/GraphBLAS/test/gbtest22.m
index 6aaf7d9661..ceb85562a6 100644
--- a/GraphBLAS/test/gbtest22.m
+++ b/GraphBLAS/test/gbtest22.m
@@ -1,7 +1,7 @@
 function gbtest22
 %GBTEST22 test reduce to scalar
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -13,7 +13,7 @@
     type = types {k} ;
     if (isequal (type, 'logical'))
         c = false ;
-        c = GrB.reduce (c, '|', '|', cast (A, 'logical')) ;
+        c = GrB.reduce (c, '|', '|', cast (A, 'logical')) ; %#ok<*NASGU>
     else
         c = ones (1, 1, type) ;
         c = GrB.reduce (c, '+', '+', cast (A, type)) ;
@@ -28,7 +28,7 @@
         for n = 0:5
             A = 100 * sprand (m, n, 0.5) ;
             G = GrB (A) ;
-            [i j x] = find (A) ;
+            [i, j, x] = find (A) ; %#ok<*ASGLU>
 
             % c1 = sum (A, 'all') ;
             c1 = sum (sum (A)) ;
@@ -97,7 +97,7 @@
             c3 = all (G, 'all') ;
             assert (c1 == logical (c3)) ;
 
-            [i j x] = find (A) ;
+            [i, j, x] = find (A) ;
             % c1 = all (x, 'all') ;
             c1 = all (x) ;
             c2 = GrB.reduce ('&.logical', A) ;
diff --git a/GraphBLAS/test/gbtest23.m b/GraphBLAS/test/gbtest23.m
index f86aac62f0..1c9f3b96db 100644
--- a/GraphBLAS/test/gbtest23.m
+++ b/GraphBLAS/test/gbtest23.m
@@ -1,7 +1,7 @@
 function gbtest23
 %GBTEST23 test min and max
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest24.m b/GraphBLAS/test/gbtest24.m
index 3a75f489c24..034b3efca2 100644
--- a/GraphBLAS/test/gbtest24.m
+++ b/GraphBLAS/test/gbtest24.m
@@ -1,7 +1,7 @@
 function gbtest24
 %GBTEST24 test any, all
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -19,7 +19,7 @@
             MB = MB .* S ;
 
             GA = GrB (MA) ;
-            GB = GrB (MB) ;
+            GB = GrB (MB) ; %#ok<*NASGU>
 
             c1 = all (MA) ;
             c2 = all (GA) ;
diff --git a/GraphBLAS/test/gbtest25.m b/GraphBLAS/test/gbtest25.m
index 25cb4e8524..9e99abe5de 100644
--- a/GraphBLAS/test/gbtest25.m
+++ b/GraphBLAS/test/gbtest25.m
@@ -1,7 +1,7 @@
 function gbtest25
 %GBTEST25 test diag, tril, triu
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest26.m b/GraphBLAS/test/gbtest26.m
index bc248de2ab..1655c706db 100644
--- a/GraphBLAS/test/gbtest26.m
+++ b/GraphBLAS/test/gbtest26.m
@@ -8,7 +8,7 @@
 
     atype = types {k1} ;
     fprintf ('\n================================================ %s\n', atype) ;
-    A = cast (100 * rand (3), atype)
+    A = cast (100 * rand (3), atype) %#ok<*NOPRT>
     H = GrB (A) ;
     B = cast (H, atype) ;
     assert (gbtest_eq (A, B)) ;
@@ -18,7 +18,7 @@
         gtype = types {k2} ;
         fprintf ('\n------------ %s:\n', gtype) ;
         G = GrB (H, gtype)
-        K = GrB (G, atype)
+        K = GrB (G, atype) %#ok<*NASGU>
         C = cast (G, atype)
     end
 end
diff --git a/GraphBLAS/test/gbtest27.m b/GraphBLAS/test/gbtest27.m
index 36d6000185..bc54980770 100644
--- a/GraphBLAS/test/gbtest27.m
+++ b/GraphBLAS/test/gbtest27.m
@@ -9,8 +9,8 @@
     atype = types {k1} ;
     fprintf ('\n================================================ %s\n', atype) ;
     A = 100 * sprand (3, 3, 0.5) ;
-    H = GrB (A, atype)
-    G = full (H)
+    H = GrB (A, atype) %#ok<*NOPRT>
+    G = full (H) %#ok<*NASGU>
 
     for k2 = 1:length (types)
 
diff --git a/GraphBLAS/test/gbtest28.m b/GraphBLAS/test/gbtest28.m
index f720d7840d..d8710fe5ca 100644
--- a/GraphBLAS/test/gbtest28.m
+++ b/GraphBLAS/test/gbtest28.m
@@ -1,7 +1,7 @@
 function gbtest28
 %GBTEST28 test GrB.build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ngbtest28: testing GrB.build and compare with A=sparse(i,j,x)\n') ;
@@ -13,7 +13,7 @@
 n = 5 ;
 A = sprand (m, n, 0.5) ;
 
-[i j x] = find (A) ;
+[i, j, x] = find (A) ;
 
 C = GrB.build (i, j, x, m, n) ;
 
@@ -32,8 +32,8 @@
 n = size (A, 1) ;
 fprintf ('%12.4f sec : A n-by-n, whth n: %g nnz: %g\n', t, n, nnz (A)) ;
 
-[i j x] = find (A) ;
-[m n] = size (A) ;
+[i, j, x] = find (A) ;
+[m, n] = size (A) ;
 
 i0 = uint64 (i) - 1 ;
 j0 = uint64 (j) - 1 ;
diff --git a/GraphBLAS/test/gbtest29.m b/GraphBLAS/test/gbtest29.m
index 4b639cdfa0..ebd64a1a86 100644
--- a/GraphBLAS/test/gbtest29.m
+++ b/GraphBLAS/test/gbtest29.m
@@ -43,7 +43,7 @@
             % and GraphBLAS, and all these uses work:
 
             C1 = C ;
-            C1 (M) = A (M) ;        % C1(M) is MATLAB, A(M) is MATLAB
+            C1 (M) = A (M) ;%#ok<*SPRIX> % C1(M) is MATLAB, A(M) is MATLAB
 
             C2 = GrB (C) ;
             C2 (M) = A (M) ;        % C2(M) is GrB, A(M) is MATLAB
diff --git a/GraphBLAS/test/gbtest3.m b/GraphBLAS/test/gbtest3.m
index 75d232d514..5dbb9d7fee 100644
--- a/GraphBLAS/test/gbtest3.m
+++ b/GraphBLAS/test/gbtest3.m
@@ -1,7 +1,7 @@
 function gbtest3
 %GBTEST3 test dnn
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('gbtest3: testing sparse deep neural network\n') ;
@@ -13,7 +13,7 @@
 nneurons = 16 ;
 
 for level = 1:levels
-    W {level} = sprand (nneurons, nneurons, 0.5) ;
+    W {level} = sprand (nneurons, nneurons, 0.5) ; %#ok<*AGROW>
     bias {level} = -0.3 * ones (1, nneurons) ;
 end
 
diff --git a/GraphBLAS/test/gbtest30.m b/GraphBLAS/test/gbtest30.m
index 126e99ba5b..2f1d3a2878 100644
--- a/GraphBLAS/test/gbtest30.m
+++ b/GraphBLAS/test/gbtest30.m
@@ -2,7 +2,7 @@
 %GBTEST30 test colon notation
 
 rng ('default') ;
-n = 1e9
+n = 1e9 %#ok<*NOPRT>
 A = sparse (n, 1) ;
 
 k = n/2 - 1 ;
@@ -36,7 +36,7 @@
 
 % GraphBLAS can construct huge-by-huge matrices
 tic
-[c huge] = computer
+[c, huge] = computer %#ok<*ASGLU>
 H = GrB (huge, huge)
 I = sort (randperm (huge, 4)) ;
 M = magic (4)
@@ -52,7 +52,7 @@
 fprintf ('GraphBLAS colon notation:\nmiddle = %g\n\n', middle) ;
 fprintf ('H2 = H ({1, middle}, {1, middle}) works, and is very fast:\n') ;
 tic
-H2 = H ({1, middle}, {1, middle})
+H2 = H ({1, middle}, {1, middle}) %#ok<*NASGU>
 toc
 
 % This is not possible, because 1:middle is too big:
diff --git a/GraphBLAS/test/gbtest33.m b/GraphBLAS/test/gbtest33.m
index f00e83c985..7888353250 100644
--- a/GraphBLAS/test/gbtest33.m
+++ b/GraphBLAS/test/gbtest33.m
@@ -42,10 +42,10 @@
                 assert (numel (G) == m*n) ;
                 assert (nzmax (G) == max (nnz (G), 1))
                 assert (isequal (size (G), [m n])) ;
-                [m1 n1]  = size (G) ;
+                [m1, n1]  = size (G) ;
                 assert (isequal ([m1 n1], [m n])) ;
-                if (m == 0 | n == 0)
-                    assert (length (G) == 0) ;
+                if (m == 0 || n == 0)
+                    assert (isempty (G)) ;
                 else
                     assert (length (G) == max (m, n)) ;
                 end
diff --git a/GraphBLAS/test/gbtest34.m b/GraphBLAS/test/gbtest34.m
index 4e480df1c1..73cb6d931c 100644
--- a/GraphBLAS/test/gbtest34.m
+++ b/GraphBLAS/test/gbtest34.m
@@ -1,7 +1,7 @@
 function gbtest34
 %GBTEST34 test repmat
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest35.m b/GraphBLAS/test/gbtest35.m
index bc20d961b6..b9eada4210 100644
--- a/GraphBLAS/test/gbtest35.m
+++ b/GraphBLAS/test/gbtest35.m
@@ -1,7 +1,7 @@
 function gbtest35
 %GBTEST35 test reshape
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default')
diff --git a/GraphBLAS/test/gbtest36.m b/GraphBLAS/test/gbtest36.m
index b3f7afc775..8e5ecc04e2 100644
--- a/GraphBLAS/test/gbtest36.m
+++ b/GraphBLAS/test/gbtest36.m
@@ -1,7 +1,7 @@
 function gbtest36
 %GBTEST36 test abs, sign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest37.m b/GraphBLAS/test/gbtest37.m
index 54dc7f000b..04cae588ad 100644
--- a/GraphBLAS/test/gbtest37.m
+++ b/GraphBLAS/test/gbtest37.m
@@ -2,7 +2,7 @@
 %GBTEST37 test istril, istriu, isbanded, isdiag, ishermitian, ...
 % issymmetric, bandwith
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest38.m b/GraphBLAS/test/gbtest38.m
index eca68155fe..d77dc48055 100644
--- a/GraphBLAS/test/gbtest38.m
+++ b/GraphBLAS/test/gbtest38.m
@@ -2,7 +2,7 @@
 %GBTEST38 test sqrt, eps, ceil, floor, round, fix, real, conj, ...
 % isfinite, isinf, isnan, spfun, eig
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest39.m b/GraphBLAS/test/gbtest39.m
index b8f5251e13..5d082dfab9 100644
--- a/GraphBLAS/test/gbtest39.m
+++ b/GraphBLAS/test/gbtest39.m
@@ -1,7 +1,7 @@
 function gbtest39
 %GBTEST39 test amd, colamd, symamd, symrcm, dmperm, etree
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 for trial = 1:40
@@ -28,21 +28,21 @@
     assert (isequal (etree (A), etree (G))) ;
     assert (isequal (etree (S), etree (H))) ;
 
-    [p1 post1] = dmperm (A) ;
-    [p2 post2] = dmperm (G) ;
+    [p1, post1] = dmperm (A) ;
+    [p2, post2] = dmperm (G) ;
     assert (isequal (p1, p2)) ;
     assert (isequal (post1, post2)) ;
 
-    [p1 post1] = dmperm (S) ;
-    [p2 post2] = dmperm (H) ;
+    [p1, post1] = dmperm (S) ;
+    [p2, post2] = dmperm (H) ;
     assert (isequal (p1, p2)) ;
     assert (isequal (post1, post2)) ;
 
     assert (isequal (dmperm (A), dmperm (G))) ;
     assert (isequal (dmperm (S), dmperm (H))) ;
 
-    [p1 q1 r1 s1 cc1 rr1] = dmperm (A) ;
-    [p2 q2 r2 s2 cc2 rr2] = dmperm (G) ;
+    [p1, q1, r1, s1, cc1, rr1] = dmperm (A) ;
+    [p2, q2, r2, s2, cc2, rr2] = dmperm (G) ;
     assert (isequal (p1, p2)) ;
     assert (isequal (q1, q2)) ;
     assert (isequal (r1, r2)) ;
@@ -50,8 +50,8 @@
     assert (isequal (cc1, cc2)) ;
     assert (isequal (rr1, rr2)) ;
 
-    [p1 q1 r1 s1 cc1 rr1] = dmperm (S) ;
-    [p2 q2 r2 s2 cc2 rr2] = dmperm (H) ;
+    [p1, q1, r1, s1, cc1, rr1] = dmperm (S) ;
+    [p2, q2, r2, s2, cc2, rr2] = dmperm (H) ;
     assert (isequal (p1, p2)) ;
     assert (isequal (q1, q2)) ;
     assert (isequal (r1, r2)) ;
diff --git a/GraphBLAS/test/gbtest4.m b/GraphBLAS/test/gbtest4.m
index 70c457b659..48b3226b57 100644
--- a/GraphBLAS/test/gbtest4.m
+++ b/GraphBLAS/test/gbtest4.m
@@ -1,8 +1,11 @@
 function gbtest4
-%GBTEST4 list all 1865 possible semirings
-% This count excludes operator synonyms
+%GBTEST4 list all 2204 possible semirings
+% This count excludes operator synonyms ('1st' and 'first', for example), but
+% it does include identical semirings with operators of different names.  For
+% example, the spec has many boolean operators with different names but they
+% compute the same thing.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 types = gbtest_types ;
@@ -34,7 +37,8 @@
     end
 end
 
-assert (nsemirings == 1865)
+nsemirings %#ok<*NOPRT>
+assert (nsemirings == 2204) ;
 GrB.semiringinfo
 
 fprintf ('\ngbtest4: all tests passed\n') ;
diff --git a/GraphBLAS/test/gbtest41.m b/GraphBLAS/test/gbtest41.m
index 7c8af86ece..1538af87f4 100644
--- a/GraphBLAS/test/gbtest41.m
+++ b/GraphBLAS/test/gbtest41.m
@@ -1,7 +1,7 @@
 function gbtest41
 %GBTEST41 test ones, zeros, false
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 types = gbtest_types ;
diff --git a/GraphBLAS/test/gbtest42.m b/GraphBLAS/test/gbtest42.m
index c79f796f7a..445214d9da 100644
--- a/GraphBLAS/test/gbtest42.m
+++ b/GraphBLAS/test/gbtest42.m
@@ -1,7 +1,7 @@
 function gbtest42
 %GBTEST42 test for nan
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest43.m b/GraphBLAS/test/gbtest43.m
index fab6ae3072..18559d6611 100644
--- a/GraphBLAS/test/gbtest43.m
+++ b/GraphBLAS/test/gbtest43.m
@@ -3,7 +3,7 @@
 %
 % All errors generated by this test are expected.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -11,8 +11,8 @@
 G = GrB (magic (5)) ;
 
 try
-    x = prod (G, 'gunk') ;
-    ok = false
+    x = prod (G, 'gunk') ; %#ok<*NASGU>
+    ok = false %#ok<*NOPRT>
 catch expected_error
     expected_error
     disp (expected_error.stack (end-1))
@@ -83,7 +83,7 @@
 assert (ok) ;
 
 try
-    [x, y] = bandwidth (G, 'lower') ;
+    [x, y] = bandwidth (G, 'lower') ; %#ok<*ASGLU>
     ok = false
 catch expected_error
     expected_error
@@ -311,7 +311,7 @@
 assert (ok) ;
 
 try
-    C = zeros (3, 3, 'crud', G) ;
+    C = zeros (3, 3, 'crud', G) ; %#ok<*PREALL>
     ok = false
 catch expected_error
     expected_error
diff --git a/GraphBLAS/test/gbtest44.m b/GraphBLAS/test/gbtest44.m
index 4d23c8b8be..be6b5cf90e 100644
--- a/GraphBLAS/test/gbtest44.m
+++ b/GraphBLAS/test/gbtest44.m
@@ -1,7 +1,7 @@
 function gbtest44
 %GBTEST44 test subsasgn, mtimes, plus, false, ...
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest45.m b/GraphBLAS/test/gbtest45.m
index ce4fff6298..7005fa4616 100644
--- a/GraphBLAS/test/gbtest45.m
+++ b/GraphBLAS/test/gbtest45.m
@@ -1,7 +1,7 @@
 function gbtest45
 %GBTEST45 test GrB.vreduce
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest46.m b/GraphBLAS/test/gbtest46.m
index 54313ffef7..3dcb1e79e2 100644
--- a/GraphBLAS/test/gbtest46.m
+++ b/GraphBLAS/test/gbtest46.m
@@ -1,7 +1,7 @@
 function gbtest46
 %GBTEST46 test GrB.subassign and GrB.assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest47.m b/GraphBLAS/test/gbtest47.m
index 7c11b12604..19ded4a802 100644
--- a/GraphBLAS/test/gbtest47.m
+++ b/GraphBLAS/test/gbtest47.m
@@ -1,7 +1,7 @@
 function gbtest47
 %GBTEST47 test GrB.entries, GrB.nonz, numel
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -51,7 +51,7 @@
 assert (c1 == c2) ;
 
 try
-    x = vpa (1) ;
+    x = vpa (1) ; %#ok<*NASGU>
     have_symbolic = true ;
 catch
     % symbolic toolbox not available
diff --git a/GraphBLAS/test/gbtest48.m b/GraphBLAS/test/gbtest48.m
index d068e53cae..157b110643 100644
--- a/GraphBLAS/test/gbtest48.m
+++ b/GraphBLAS/test/gbtest48.m
@@ -1,7 +1,7 @@
 function gbtest48
 %GBTEST48 test GrB.apply
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest49.m b/GraphBLAS/test/gbtest49.m
index 68de2e1b91..c20ba28711 100644
--- a/GraphBLAS/test/gbtest49.m
+++ b/GraphBLAS/test/gbtest49.m
@@ -1,7 +1,7 @@
 function gbtest49
 %GBTEST49 test GrB.prune
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -20,7 +20,7 @@
     assert (isequal (C0, C2)) ;
 
     C0 = sparse (A) ;
-    C0 (1,1) = 0 ;
+    C0 (1,1) = 0 ; %#ok<*SPRIX>
     C1 = GrB.prune (A, 1) ;
     C2 = GrB.prune (G, 1) ;
     assert (isequal (C0, double (C1))) ;
diff --git a/GraphBLAS/test/gbtest5.m b/GraphBLAS/test/gbtest5.m
index 33bd0910d8..10b1f8fb7e 100644
--- a/GraphBLAS/test/gbtest5.m
+++ b/GraphBLAS/test/gbtest5.m
@@ -1,17 +1,19 @@
 function gbtest5
 %GBTEST5 test GrB.descriptorinfo
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 list_out  = { [ ], 'default', 'replace' } ;
 list_in   = { [ ], 'default', 'transpose' } ;
-list_mask = { [ ], 'default', 'complement' } ;
-list_axb  = { [ ], 'default', 'gustavson', 'heap', 'dot' } ;
+list_mask = { [ ], 'default', 'complement', ...
+                   'structural complement', 'structure' } ;
+list_axb  = { [ ], 'default', 'gustavson', 'heap', 'dot', 'hash', 'saxpy' } ;
+list_kind = { [ ], 'sparse', 'full', 'grb', 'default' } ;
 
 ntrials = 0;
 
-d = struct
+d = struct %#ok<*NASGU,*NOPRT>
 
 for k1 = 1:length (list_out)
     out = list_out {k1} ;
@@ -23,44 +25,51 @@
                 mask = list_mask {k4} ;
                 for k5 = 1:length (list_axb)
                     axb = list_axb {k5} ;
+                    for k6 = 1:length (list_kind)
+                        kind = list_kind {k6} ;
 
-                    for nthreads = [0 2]
-                        for chunk = [0 10000]
+                        for nthreads = [0 2]
+                            for chunk = [0 10000]
 
-                            clear d
-                            d = struct ;
+                                clear d
+                                d = struct ;
 
-                            if (~isempty (out))
-                                d.out = out ;
-                            end
+                                if (~isempty (out))
+                                    d.out = out ;
+                                end
 
-                            if (~isempty (mask))
-                                d.mask = mask ;
-                            end
+                                if (~isempty (mask))
+                                    d.mask = mask ;
+                                end
 
-                            if (~isempty (in0))
-                                d.in0 = in0 ;
-                            end
+                                if (~isempty (in0))
+                                    d.in0 = in0 ;
+                                end
 
-                            if (~isempty (in1))
-                                d.in1 = in1 ;
-                            end
+                                if (~isempty (in1))
+                                    d.in1 = in1 ;
+                                end
 
-                            if (~isempty (axb))
-                                d.axb = axb ;
-                            end
+                                if (~isempty (axb))
+                                    d.axb = axb ;
+                                end
 
-                            if (nthreads > 0)
-                                d.nthreads = nthreads ;
-                            end
+                                if (~isempty (kind))
+                                    d.kind = kind ;
+                                end
 
-                            if (chunk > 0)
-                                d.chunk = chunk ;
-                            end
+                                if (nthreads > 0)
+                                    d.nthreads = nthreads ;
+                                end
 
-                            d
-                            GrB.descriptorinfo (d) ;
-                            ntrials = ntrials + 1 ;
+                                if (chunk > 0)
+                                    d.chunk = chunk ;
+                                end
+
+                                d
+                                GrB.descriptorinfo (d) ;
+                                ntrials = ntrials + 1 ;
+                            end
                         end
                     end
                 end
diff --git a/GraphBLAS/test/gbtest50.m b/GraphBLAS/test/gbtest50.m
index a4a0800073..c876b88e49 100644
--- a/GraphBLAS/test/gbtest50.m
+++ b/GraphBLAS/test/gbtest50.m
@@ -1,12 +1,12 @@
 function gbtest50
 %GBTEST50 test GrB.ktruss and GrB.tricount
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
 
-load west0479 ;
+load west0479 ; %#ok<*LOAD>
 A = GrB.offdiag (west0479) ;
 A = A+A' ;
 C3a  = GrB.ktruss (A) ;
diff --git a/GraphBLAS/test/gbtest51.m b/GraphBLAS/test/gbtest51.m
index fa3c9f76de..c024fe6899 100644
--- a/GraphBLAS/test/gbtest51.m
+++ b/GraphBLAS/test/gbtest51.m
@@ -1,7 +1,7 @@
 function gbtest51
 %GBTEST51 test GrB.tricount
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 files =  {
@@ -39,7 +39,7 @@
            0
          120 ] ;
 
-[filepath, name, ext] = fileparts (mfilename ('fullpath')) ;
+[filepath, name, ext] = fileparts (mfilename ('fullpath')) ; %#ok<*ASGLU>
 
 for k = 1:nfiles
     filename = files {k} ;
@@ -47,7 +47,7 @@
     G = GrB.build (int64 (T (:,1)), int64 (T (:,2)), T (:,3), desc) ;
     [m, n] = size (G) ;
     if (m ~= n)
-        G = [GrB(m,m) G ; G' GrB(n,n)] ;
+        G = [GrB(m,m) G ; G' GrB(n,n)] ; %#ok<*AGROW>
     elseif (~issymmetric (G))
         G = G + G' ;
     end
diff --git a/GraphBLAS/test/gbtest52.m b/GraphBLAS/test/gbtest52.m
index da3eae61b2..39d80ab24d 100644
--- a/GraphBLAS/test/gbtest52.m
+++ b/GraphBLAS/test/gbtest52.m
@@ -1,17 +1,17 @@
 function gbtest52
 %GBTEST52 test GrB.format
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 GrB.format
 GrB.format ('by col') ;
-f = GrB.format
+f = GrB.format %#ok<*NOPRT>
 A = magic (4)
 G = GrB (A)
 assert (isequal (f, GrB.format (G))) ;
 GrB.format ('by row')
-f = GrB.format
+f = GrB.format %#ok<*NASGU>
 
 H = GrB (5,5)
 assert (isequal ('by row', GrB.format (H))) ;
diff --git a/GraphBLAS/test/gbtest53.m b/GraphBLAS/test/gbtest53.m
index df3025eb88..ae4ede2c23 100644
--- a/GraphBLAS/test/gbtest53.m
+++ b/GraphBLAS/test/gbtest53.m
@@ -1,7 +1,7 @@
 function gbtest53
 %GBTEST53 test GrB.monoidinfo
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 types10 = {
diff --git a/GraphBLAS/test/gbtest54.m b/GraphBLAS/test/gbtest54.m
index 0dfc5e0dbf..7b72db30ba 100644
--- a/GraphBLAS/test/gbtest54.m
+++ b/GraphBLAS/test/gbtest54.m
@@ -1,7 +1,7 @@
 function gbtest54
 %GBTEST54 test GrB.compact
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -11,7 +11,7 @@
 J = sort (randperm (n, 4)) ;
 A = magic (4) ;
 H (I,J) = A ;
-[C, I, J] = GrB.compact (H) ;
+[C, I, J] = GrB.compact (H) ; %#ok<*ASGLU>
 H (I, J(1)) = 0 ;
 [C, I, J] = GrB.compact (H, 0) ;
 assert (isequal (C, A (:,2:end))) ;
diff --git a/GraphBLAS/test/gbtest55.m b/GraphBLAS/test/gbtest55.m
index ce2fb85889..8b4b80af7b 100644
--- a/GraphBLAS/test/gbtest55.m
+++ b/GraphBLAS/test/gbtest55.m
@@ -1,12 +1,12 @@
 function gbtest55
 %GBTEST55 test disp
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
 
-H = GrB (rand (6))
+H = GrB (rand (6)) %#ok<*NOPRT>
 
 fprintf ('default:\n') ;
 disp (H) ;
diff --git a/GraphBLAS/test/gbtest56.m b/GraphBLAS/test/gbtest56.m
index 62e46558cb..13e53a1d92 100644
--- a/GraphBLAS/test/gbtest56.m
+++ b/GraphBLAS/test/gbtest56.m
@@ -1,7 +1,7 @@
 function gbtest56
 %GBTEST56 test GrB.empty
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 for m1 = -1:5
diff --git a/GraphBLAS/test/gbtest57.m b/GraphBLAS/test/gbtest57.m
index ef5210d074..0d0455499b 100644
--- a/GraphBLAS/test/gbtest57.m
+++ b/GraphBLAS/test/gbtest57.m
@@ -1,7 +1,7 @@
 function gbtest57
 %GBTEST57 test fprintf and sprintf
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 c1 = fprintf ('pi: %g\n', pi) ;
diff --git a/GraphBLAS/test/gbtest58.m b/GraphBLAS/test/gbtest58.m
index 3eeb24b46f..fd3b1347fd 100644
--- a/GraphBLAS/test/gbtest58.m
+++ b/GraphBLAS/test/gbtest58.m
@@ -1,7 +1,7 @@
 function gbtest58
 %GBTEST58 test uplus
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 A = 1 - 2 * rand (3) ;
diff --git a/GraphBLAS/test/gbtest59.m b/GraphBLAS/test/gbtest59.m
index 301303882a..f58434e689 100644
--- a/GraphBLAS/test/gbtest59.m
+++ b/GraphBLAS/test/gbtest59.m
@@ -1,7 +1,7 @@
 function gbtest59
 %GBTEST59 test end
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 A = rand (4,7) ;
diff --git a/GraphBLAS/test/gbtest6.m b/GraphBLAS/test/gbtest6.m
index 38a7e4a866..2ec62fe7da 100644
--- a/GraphBLAS/test/gbtest6.m
+++ b/GraphBLAS/test/gbtest6.m
@@ -1,7 +1,7 @@
 function gbtest6
 %GBTEST6 test GrB.mxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest60.m b/GraphBLAS/test/gbtest60.m
index f51f387b67..d01d7e916f 100644
--- a/GraphBLAS/test/gbtest60.m
+++ b/GraphBLAS/test/gbtest60.m
@@ -1,7 +1,7 @@
 function gbtest60
 %GBTEST60 test GrB.issigned
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 signed_types   = { 'double', 'single', 'int8', 'int16', 'int32', 'int64' } ;
diff --git a/GraphBLAS/test/gbtest61.m b/GraphBLAS/test/gbtest61.m
index d6c69aba28..d395df3460 100644
--- a/GraphBLAS/test/gbtest61.m
+++ b/GraphBLAS/test/gbtest61.m
@@ -1,7 +1,7 @@
 function gbtest61
 %GBTEST61 test GrB.laplacian
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest62.m b/GraphBLAS/test/gbtest62.m
index 2d882515aa..df47c41109 100644
--- a/GraphBLAS/test/gbtest62.m
+++ b/GraphBLAS/test/gbtest62.m
@@ -1,7 +1,7 @@
 function gbtest62
 %GBTEST62 test ldivide, rdivide, mldivide, mrdivide
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest63.m b/GraphBLAS/test/gbtest63.m
index 4f94a8cfbb..0efce10e32 100644
--- a/GraphBLAS/test/gbtest63.m
+++ b/GraphBLAS/test/gbtest63.m
@@ -1,7 +1,7 @@
 function gbtest63
 %GBTEST63 test GrB.incidence
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -24,7 +24,7 @@
         2 7 ] ;
         W = sparse (ij (:,1), ij (:,2), ones (12,1), 8, 8) ;
     else
-        load west0479 ;
+        load west0479 ; %#ok<*LOAD>
         W = west0479 ;
     end
 
diff --git a/GraphBLAS/test/gbtest64.m b/GraphBLAS/test/gbtest64.m
index c686d3c22b..5c51ec70b7 100644
--- a/GraphBLAS/test/gbtest64.m
+++ b/GraphBLAS/test/gbtest64.m
@@ -1,10 +1,10 @@
 function gbtest64
 %GBTEST64 test GrB.pagerank
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-load west0479 ;
+load west0479 ; %#ok<*LOAD>
 W = abs (west0479) ;
 W (1,:) = 0 ;
 
@@ -29,7 +29,7 @@
 warning ('off', 'GrB:pagerank') ;
 
 r1 = centrality (A, 'pagerank', 'MaxIterations', 2) ;
-[msg, id] = lastwarn ;
+[msg, id] = lastwarn ; %#ok<*ASGLU>
 assert (isequal (id, 'MATLAB:graphfun:centrality:PageRankNoConv')) ;
 
 r2 = GrB.pagerank (G, struct ('maxit', 2)) ;
diff --git a/GraphBLAS/test/gbtest65.m b/GraphBLAS/test/gbtest65.m
index 32c9770973..f9bd16c45c 100644
--- a/GraphBLAS/test/gbtest65.m
+++ b/GraphBLAS/test/gbtest65.m
@@ -1,7 +1,7 @@
 % function gbtest65
 %GBTEST65 test GrB.mis
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest66.m b/GraphBLAS/test/gbtest66.m
index c9322ec054..2e16f5ca54 100644
--- a/GraphBLAS/test/gbtest66.m
+++ b/GraphBLAS/test/gbtest66.m
@@ -1,7 +1,7 @@
 function gbtest66
 %GBTEST66 test graph
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest67.m b/GraphBLAS/test/gbtest67.m
index 73dcfdff70..f6893a997e 100644
--- a/GraphBLAS/test/gbtest67.m
+++ b/GraphBLAS/test/gbtest67.m
@@ -1,7 +1,7 @@
 function gbtest67
 %GBTEST67 test digraph
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest68.m b/GraphBLAS/test/gbtest68.m
index 2aa897c502..e69aaf271e 100644
--- a/GraphBLAS/test/gbtest68.m
+++ b/GraphBLAS/test/gbtest68.m
@@ -1,7 +1,7 @@
 function gbtest68
 %GBTEST68 test isequal
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest69.m b/GraphBLAS/test/gbtest69.m
index 015863bf48..3c0d847acf 100644
--- a/GraphBLAS/test/gbtest69.m
+++ b/GraphBLAS/test/gbtest69.m
@@ -1,7 +1,7 @@
 function gbtest69
 %GBTEST69 test flip
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/GraphBLAS/test/gbtest7.m b/GraphBLAS/test/gbtest7.m
index 07456c69cf..3dfbf5ce76 100644
--- a/GraphBLAS/test/gbtest7.m
+++ b/GraphBLAS/test/gbtest7.m
@@ -1,7 +1,7 @@
 function gbtest7
 %GBTEST7 test GrB.build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -10,8 +10,8 @@
 A = sprand (n, n, 0.5) ;
 A (n,n) = 5 ;
 
-[i j x] = find (A) ;
-[m n] = size (A) ;
+[i, j, x] = find (A) ;
+[m, n] = size (A) ;
 
 G = GrB.build (i, j, x, m, n) ;
 S = sparse   (i, j, x, m, n) ;
diff --git a/GraphBLAS/test/gbtest70.m b/GraphBLAS/test/gbtest70.m
index dcc5c185be..4671f3895e 100644
--- a/GraphBLAS/test/gbtest70.m
+++ b/GraphBLAS/test/gbtest70.m
@@ -1,7 +1,7 @@
 function gbtest70
 %GBTEST70 test GrB.random
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 
@@ -18,14 +18,14 @@
     type = types {k} ;
 
     rng ('default') ;
-    G = GrB.random (30, 40, 0.6) ;
+    G = GrB.random (30, 40, 0.6) ; %#ok<*NASGU>
 
     r = cast ([3 40], type) ;
     G = GrB.random (300, 400, 0.6, 'range', r) ;
     assert (isequal (GrB.type (G), type)) ;
 
     if (~isequal (type, 'logical'))
-        [i,j,x] = find (G) ;
+        [i,j,x] = find (G) ; %#ok<*ASGLU>
         if (isinteger (r))
             assert (min (r) == min (r)) ;
             assert (max (r) == max (r)) ;
@@ -40,7 +40,7 @@
 
     G = GrB.random (30, 40, inf, 'normal') ;
     assert (isequal (GrB.type (G), 'double')) ;
-    assert (nnz (G) == prod (size (G))) ;
+    assert (nnz (G) == prod (size (G))) ; %#ok<*PSIZE>
 
     G = GrB.random (30, 40, 0.6, 'normal', 'range', r) ;
     assert (isequal (GrB.type (G), type)) ;
diff --git a/GraphBLAS/test/gbtest71.m b/GraphBLAS/test/gbtest71.m
index 6f12e271e0..334f1acefb 100644
--- a/GraphBLAS/test/gbtest71.m
+++ b/GraphBLAS/test/gbtest71.m
@@ -1,7 +1,7 @@
 function gbtest71
 %GBTEST71 test GrB.selectopinfo
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 
diff --git a/GraphBLAS/test/gbtest72.m b/GraphBLAS/test/gbtest72.m
new file mode 100644
index 0000000000..51d882366d
--- /dev/null
+++ b/GraphBLAS/test/gbtest72.m
@@ -0,0 +1,35 @@
+function gbtest72
+%GBTEST72 test any-pair semiring
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+rng ('default') ;
+dt = struct ('in0', 'transpose') ;
+ntrials = 1000 ;
+
+for n = [1 5 10 100 1000]
+    nfound = 0 ;
+    for trial = 1:ntrials
+        x = GrB.random (n, 1, 0.1, 'range', uint32 ([1 255])) ;
+        y = GrB.random (n, 1, 0.1, 'range', uint32 ([1 255])) ;
+        c1 = x'*y ;
+
+        c3 = GrB.mxm ('+.*', x, y, dt) ;
+        assert (isequal (c1, c3)) ;
+
+        c2 = GrB.mxm ('any.pair', x, y, dt) ;
+
+        c1_present = (GrB.entries (c1) == 1) ;
+        c2_present = (c2 == 1) ;
+        if (c1_present)
+            nfound = nfound + 1 ;
+        end
+        assert (c1_present == c2_present) ;
+        assert (c1_present == c2) ;
+    end
+    fprintf ('n: %4d trials: %4d found: %4d\n', n, ntrials, nfound) ;
+end
+
+fprintf ('gbtest72: all tests passed\n') ;
+
diff --git a/GraphBLAS/test/gbtest73.m b/GraphBLAS/test/gbtest73.m
new file mode 100644
index 0000000000..60910eadbc
--- /dev/null
+++ b/GraphBLAS/test/gbtest73.m
@@ -0,0 +1,16 @@
+function gbtest73
+%GBTEST73 test GrB.normdiff
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+rng ('default') ;
+
+x = rand (5, 1) ;
+y = rand (5, 1) ;
+e1 = GrB.normdiff (x, y) ;
+e2 = norm (x-y) ;
+assert (abs (e1 - e2) < 1e-12) ;
+
+fprintf ('gbtest73: all tests passed\n') ;
+
diff --git a/GraphBLAS/test/gbtest8.m b/GraphBLAS/test/gbtest8.m
index ab9815e06f..8f2e0bcde6 100644
--- a/GraphBLAS/test/gbtest8.m
+++ b/GraphBLAS/test/gbtest8.m
@@ -1,7 +1,7 @@
 function gbtest8
 %GBTEST8 test GrB.select
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %   tril
diff --git a/GraphBLAS/test/gbtest97.m b/GraphBLAS/test/gbtest97.m
new file mode 100644
index 0000000000..4269f5cb22
--- /dev/null
+++ b/GraphBLAS/test/gbtest97.m
@@ -0,0 +1,65 @@
+function gbtest97
+%GBTEST97 test A*x performance
+
+max_nthreads = GrB.threads ;
+threads = [1 2 4 8 16 20 32 40 64] ;
+
+n = 1e5 ;
+nz = 2e6 ;
+d = nz / n^2 ;
+G = GrB.random (n,n,d) ;
+A = double (G) ;
+% warmup to make sure the GrB library is loaded
+y = GrB (rand (2)) * GrB (rand (2)) ;
+
+ntrials = 10 ;
+
+for test = 1:4
+
+    if (test == 1)
+        X = 'sparse (rand (n,1))' ;
+        x =  sparse (rand (n,1)) ;
+    elseif (test == 2)
+        X = 'rand (n,1)' ;
+        x =  rand (n,1) ;
+    elseif (test == 3)
+        X = 'sprand (n,1,0.5)' ;
+        x =  sprand (n,1,0.5) ;
+    else
+        X = 'sprand (n,1,0.05)' ;
+        x =  sprand (n,1,0.05) ;
+    end
+
+    fprintf ('\n\n========================\n') ;
+    fprintf ('in MATLAB: y = A*x where x = %s\n', X) ;
+
+    tic
+    for trial = 1:ntrials
+        y = A*x ;
+    end
+    tmatlab = toc ;
+    fprintf ('MATLAB time: %8.4f sec\n', tmatlab) ;
+    ymatlab = y ;
+
+    fprintf ('\nGrB: y = A*x where x = %s\n', X) ;
+
+    for nthreads = threads
+        if (nthreads > max_nthreads)
+            break ;
+        end
+        GrB.threads (nthreads) ;
+        tic
+        for trial = 1:ntrials
+            y = G*x ;
+        end
+        t = toc ;
+        if (nthreads == 1)
+            t1 = t ;
+        end
+        fprintf (...
+            'threads: %2d GrB time: %8.4f speedup vs MATLAB: %8.2f  vs: GrB(1 thread) %8.2f\n', ...
+            nthreads, t, tmatlab / t, t1 / t) ;
+        assert (norm (y-ymatlab, 1) / norm (ymatlab,1) < 1e-12)
+    end
+
+end
diff --git a/GraphBLAS/test/gbtest98.m b/GraphBLAS/test/gbtest98.m
new file mode 100644
index 0000000000..f4d18c2186
--- /dev/null
+++ b/GraphBLAS/test/gbtest98.m
@@ -0,0 +1,98 @@
+function gbtest98
+%GBTEST98 test A'*x performance
+
+max_nthreads = GrB.threads ;
+threads = [1 2 4 8 16 20 32 40 64] ;
+desc = struct ('in0', 'transpose') ;
+rng ('default') ;
+
+n = 1e6 ; nz = 20e6 ;
+% n = 1e5 ; nz = 1e6 ;
+d = nz / n^2 ;
+% same as A = sprand (n,n,d), but faster:
+G = GrB.random (n,n,d) ;
+A = double (G) ;
+% warmup to make sure the GrB library is loaded
+y = GrB (rand (2)) * GrB (rand (2)) ;
+
+degree = sum (spones (G)) ;
+nempty = length (find (degree == 0)) ;
+fprintf ('matrix: n: %d nnz: %d  # empty columns: %d\n', n, nnz (A), nempty) ;
+
+ntrials = 1 ;
+
+for test = 1:4
+
+    if (test == 1)
+        X = 'sparse (rand (n,1))' ;
+        x =  sparse (rand (n,1)) ;
+    elseif (test == 2)
+        X = 'rand (n,1)' ;
+        x =  rand (n,1) ;
+    elseif (test == 3)
+        X = 'sprand (n,1,0.5)' ;
+        x =  sprand (n,1,0.5) ;
+    else
+        X = 'sprand (n,1,0.05)' ;
+        x =  sprand (n,1,0.05) ;
+    end
+
+    fprintf ('\n\n========================\n') ;
+    fprintf ('in MATLAB: y = A''*x where x = %s\n', X) ;
+
+    tic
+    for trial = 1:ntrials
+        y = A'*x ;
+    end
+    tmatlab = toc ;
+    fprintf ('MATLAB time: %8.4f sec\n', tmatlab) ;
+    ymatlab = y ;
+
+    fprintf ('\nGrB: y = A''*x where x = %s\n', X) ;
+
+    for nthreads = threads
+        if (nthreads > max_nthreads)
+            break ;
+        end
+        GrB.threads (nthreads) ;
+        tic
+        for trial = 1:ntrials
+            % y = G'*x ;
+            y = GrB.mxm (G, '+.*', x, desc) ;
+        end
+        t = toc ;
+        if (nthreads == 1)
+            t1 = t ;
+        end
+        fprintf (...
+            'threads: %2d GrB time: %8.4f speedup vs MATLAB: %8.2f  vs: GrB(1 thread) %8.2f\n', ...
+            nthreads, t, tmatlab / t, t1 / t) ;
+        assert (norm (y-ymatlab, 1) / norm (ymatlab,1) < 1e-12)
+    end
+
+    fprintf ('\nGrB: y = zeros(n,1) + A''*x where x = %s\n', X) ;
+
+    for nthreads = threads
+        if (nthreads > max_nthreads)
+            break ;
+        end
+        GrB.threads (nthreads) ;
+        tic
+        for trial = 1:ntrials
+            y = zeros (n,1) ;
+            % y = y + G'*x
+            y = GrB.mxm (y, '+', G, '+.*', x, desc) ;
+        end
+        t = toc ;
+        if (nthreads == 1)
+            t1 = t ;
+        end
+        fprintf (...
+            'threads: %2d GrB time: %8.4f speedup vs MATLAB: %8.2f  vs: GrB(1 thread) %8.2f\n', ...
+            nthreads, t, tmatlab / t, t1 / t) ;
+        assert (norm (y-ymatlab, 1) / norm (ymatlab,1) < 1e-12)
+    end
+
+end
+
+GrB.burble (0) ;
diff --git a/GraphBLAS/test/gbtest99.m b/GraphBLAS/test/gbtest99.m
index fabcead20f..091e688033 100644
--- a/GraphBLAS/test/gbtest99.m
+++ b/GraphBLAS/test/gbtest99.m
@@ -1,9 +1,13 @@
-function gbtest99
+function gbtest99 (doplots)
 %GBTEST99 test GrB.bfs
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+if (nargin < 1)
+    doplots = true ;
+end
+
 save_threads = GrB.threads ;
 save_chunk   = GrB.chunk ;
 GrB.threads (4) ;
@@ -33,30 +37,41 @@
 A = sparse (ij (:,1), ij (:,2), ones (12,1), 8, 8) ;
 
 formats = { 'by row', 'by col' } ;
-figure (1) ;
-clf ;
+if (doplots)
+    figure (1) ;
+    clf ;
+end
 
 for k1 = 1:2
     fmt = formats {k1} ;
 
     A = GrB (A, fmt) ;
     H = GrB (A, 'logical', fmt) ;
-    if (k1 == 1)
+    if (k1 == 1 && doplots)
         subplot (1,2,1) ;
         plot (digraph (A)) ;
     end
 
     v1 = GrB.bfs (H, source) ;
-    [v pi] = GrB.bfs (H, source) ;
+    [v, pi] = GrB.bfs (H, source) ;
     assert (isequal (v, v1)) ;
 
-    v
     vok = [1 2 3 2 3 4 3 0] ;
     assert (isequal (full (double (v)), vok)) ;
 
-    pi
-    piok = [1 1 4 1 2 3 2 0] ;
-    assert (isequal (full (double (pi)), piok)) ;
+    % there are 2 valid trees, and GrB.bfs can return either one
+    piok1 = [1 1 4 1 2 3 2 0] ;
+    piok2 = [1 1 4 1 2 5 2 0] ;
+    ok1 = isequal (full (double (pi)), piok1) ;
+    ok2 = isequal (full (double (pi)), piok2) ;
+    if (ok1)
+        % this tree is more commonly found
+        % fprintf ('.') ;
+    end
+    if (ok2)
+        % fprintf ('#') ;
+    end
+    assert (ok1 || ok2) ;
 
     G = digraph (H) ;
     v2 = bfsearch (G, source) ;
@@ -64,26 +79,60 @@
     levels = full (double (v (v2))) ;
     assert (isequal (levels, sort (levels))) ;
 
-    [v pi] = GrB.bfs (H, source, 'directed') ;
+    [v, pi] = GrB.bfs (H, source, 'directed') ;
     assert (isequal (full (double (v)), vok)) ;
-    assert (isequal (full (double (pi)), piok)) ;
 
-    [v pi] = GrB.bfs (H, source, 'directed', 'check') ;
+    ok1 = isequal (full (double (pi)), piok1) ;
+    ok2 = isequal (full (double (pi)), piok2) ;
+    if (ok1)
+        % this tree is more commonly found
+        % fprintf ('+') ;
+    end
+    if (ok2)
+        % this is also valid
+        % fprintf ('-') ;
+    end
+    assert (ok1 || ok2) ;
+
+    [v, pi] = GrB.bfs (H, source, 'directed', 'check') ;
     assert (isequal (full (double (v)), vok)) ;
-    assert (isequal (full (double (pi)), piok)) ;
+
+    ok1 = isequal (full (double (pi)), piok1) ;
+    ok2 = isequal (full (double (pi)), piok2) ;
+    if (ok1)
+        % this tree is more commonly found
+        % fprintf ('\\') ;
+    end
+    if (ok2)
+        % this is also valid
+        % fprintf ('/') ;
+    end
+    assert (ok1 || ok2) ;
 
 end
 
 A = A+A' ;
-[v pi] = GrB.bfs (A, 2, 'undirected') ;
-subplot (1,2,2) ;
-plot (graph (A))
-v
+[v, pi] = GrB.bfs (A, 2, 'undirected') ;
+if (doplots)
+    subplot (1,2,2) ;
+    plot (graph (A))
+end
 vok = [2 1 3 3 2 3 2 0] ;
 assert (isequal (full (double (v)), vok)) ;
-pi
-piok = [2 2 7 1 2 5 2 0] ;
-assert (isequal (full (double (pi)), piok)) ;
+% two valid trees:
+piok1 = [2 2 7 1 2 5 2 0] ;
+piok2 = [2 2 7 7 2 5 2 0] ;
+
+    ok1 = isequal (full (double (pi)), piok1) ;
+    ok2 = isequal (full (double (pi)), piok2) ;
+    if (ok1)
+        % this tree is more commonly found
+        % fprintf ('@') ;
+    end
+    if (ok2)
+        % fprintf ('_') ;
+    end
+    assert (ok1 || ok2) ;
 
 GrB.threads (save_threads) ;
 GrB.chunk (save_chunk) ;
diff --git a/GraphBLAS/test/gbtest_binops.m b/GraphBLAS/test/gbtest_binops.m
index 835ceb03ad..0ea2673d80 100644
--- a/GraphBLAS/test/gbtest_binops.m
+++ b/GraphBLAS/test/gbtest_binops.m
@@ -1,20 +1,22 @@
-function [binops synonyms] = gbtest_binops
+function [binops, synonyms] = gbtest_binops
 %GBTEST_BINOPS return a cell array of strings, listing all binary operators
 % Types are not included; see gbtest_types.
 %
 % [binops synonyms] = gbtest_binops ;
 %
-% returns a list of the names of the 25 operators in binops, and a list of
+% returns a list of the names of the operators in binops, and a list of
 % their synonyms in the 2nd output.
 %
 % See also GrB.binopinfo.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 binops = {
     '1st'
     '2nd'
+    'pair'
+    'any'
     'min'
     'max'
     '+'
diff --git a/GraphBLAS/test/gbtest_eq.m b/GraphBLAS/test/gbtest_eq.m
index 8ec184d86e..df7c31c7ca 100644
--- a/GraphBLAS/test/gbtest_eq.m
+++ b/GraphBLAS/test/gbtest_eq.m
@@ -1,7 +1,7 @@
 function s = gbtest_eq (A, B)
 %GBTEST_EQ tests if A and B are equal, after dropping zeros.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 s = isequal (GrB.prune (A), GrB.prune (B)) ;
diff --git a/GraphBLAS/test/gbtest_types.m b/GraphBLAS/test/gbtest_types.m
index 1e34d572a6..d6e38ebb22 100644
--- a/GraphBLAS/test/gbtest_types.m
+++ b/GraphBLAS/test/gbtest_types.m
@@ -3,7 +3,7 @@
 %
 % See also gbtest_binops.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 types = {
diff --git a/GraphBLAS/test/tcov/Contents.m b/GraphBLAS/test/tcov/Contents.m
index d9945dc276..ac64b3a790 100644
--- a/GraphBLAS/test/tcov/Contents.m
+++ b/GraphBLAS/test/tcov/Contents.m
@@ -6,6 +6,9 @@
 % all temporary files, use 'make distclean' or remove the tmp/* files and
 % folders.
 %
+% To run these tests, if GraphBLAS/@GrB is initialized, first use
+% GrB.finalize.  Then gbcov can load the modified MATLAB interface.
+%
 %   gbcov      - run all GraphBLAS tests, with statement coverage
 %
 % Utilities:
@@ -14,6 +17,6 @@
 %   gbcovshow  - report GraphBLAS statement coverage
 %   gbcov_edit - create a version of GraphBLAS for statement coverage tests
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
diff --git a/GraphBLAS/test/tcov/Makefile b/GraphBLAS/test/tcov/Makefile
index 89715bd16c..c718bfd801 100644
--- a/GraphBLAS/test/tcov/Makefile
+++ b/GraphBLAS/test/tcov/Makefile
@@ -2,7 +2,7 @@
 # GraphBLAS/GraphBLAS/test/tcov/Makefile
 #-------------------------------------------------------------------------------
 
-#  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+#  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 #  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 #-------------------------------------------------------------------------------
diff --git a/GraphBLAS/test/tcov/gbcov.m b/GraphBLAS/test/tcov/gbcov.m
index ee41513611..e6af6cf077 100644
--- a/GraphBLAS/test/tcov/gbcov.m
+++ b/GraphBLAS/test/tcov/gbcov.m
@@ -1,12 +1,18 @@
 function gbcov
 %GBCOV run all GraphBLAS tests, with statement coverage
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % compile the coverage-test version of the @GrB mexFunctions
-clear all
-global gbcov_global
+clear all %#ok<*CLALL>
+global gbcov_global %#ok<*NUSED>
+
+try
+    % clear the default GrB library
+    GrB.finalize ;
+catch
+end
 
 gbcovmake
 addpath ('..') ;            % add the test folder to the path
@@ -16,9 +22,20 @@
 % run the tests
 gbtest ;
 
+try
+    % clear the test coverage version of the GrB library
+    GrB.finalize ;
+catch
+end
+
 addpath ('../..') ;         % add back the regular @GrB class
 rmpath ('tmp') ;            % remove the modified @GrB class
 
 % report the coverage
 gbcovshow ;
 
+try
+    % reload the default GrB library
+    GrB.init ;
+catch
+end
diff --git a/GraphBLAS/test/tcov/gbcov_edit.m b/GraphBLAS/test/tcov/gbcov_edit.m
index cab14781aa..306e290be4 100644
--- a/GraphBLAS/test/tcov/gbcov_edit.m
+++ b/GraphBLAS/test/tcov/gbcov_edit.m
@@ -13,7 +13,7 @@
 %
 %   { gbcov [count]++ ;
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 %  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % infiles can be a struct from dir, or a single string with one filename
diff --git a/GraphBLAS/test/tcov/gbcov_util.c b/GraphBLAS/test/tcov/gbcov_util.c
index 884e54b068..561182edca 100644
--- a/GraphBLAS/test/tcov/gbcov_util.c
+++ b/GraphBLAS/test/tcov/gbcov_util.c
@@ -2,7 +2,7 @@
 // gbcov_util.c: utilities for test coverage
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/GraphBLAS/test/tcov/gbcovmake.m b/GraphBLAS/test/tcov/gbcovmake.m
index df7c4857f0..89a542c9c6 100644
--- a/GraphBLAS/test/tcov/gbcovmake.m
+++ b/GraphBLAS/test/tcov/gbcovmake.m
@@ -3,7 +3,7 @@
 %
 % See also: gbcover, gbcov_edit
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if verLessThan ('matlab', '9.4')
@@ -71,6 +71,7 @@
             end
         end
     end
+catch
 end
 
 libraries = '-L../../../../../../build -L. -L/usr/local/lib -lgraphblas' ;
@@ -98,7 +99,7 @@
         % get the object file name
         ofile = cfiles(k).name ;
         objfile = [ ofile(1:end-2) '.o' ] ;
-        objlist = [ objlist ' ' objfile ] ;
+        objlist = [ objlist ' ' objfile ] ; %#ok<*AGROW>
         % compile the cfile
         mexcmd = sprintf ('mex -c %s -silent %s %s', flags, inc, cfile) ;
         fprintf ('%s\n', cfile) ;
diff --git a/GraphBLAS/test/tcov/gbcovshow.m b/GraphBLAS/test/tcov/gbcovshow.m
index 297c89c610..8c205a3416 100644
--- a/GraphBLAS/test/tcov/gbcovshow.m
+++ b/GraphBLAS/test/tcov/gbcovshow.m
@@ -1,7 +1,7 @@
 function gbcovshow
 %GBCOVSHOW report GraphBLAS statement coverage
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % report the coverage summary
@@ -45,12 +45,12 @@
         fprintf (f_output, '%s\n', cline) ;
 
         if (~isempty (strfind (cline, 'gbcov[')) && ...
-            ~isempty (strfind (cline, '++')))
+            ~isempty (strfind (cline, '++'))) %#ok<*STREMP>
             % got one; get the count
             k1 = strfind (cline, '[') ;
             k2 = strfind (cline, ']') ;
             s = cline (k1+1:k2-1) ;
-            i = str2num (s) + 1 ;
+            i = str2num (s) + 1 ; %#ok<*ST2NM>
             c = gbcov_global (i) ;
             if (c == 0)
                 fprintf (f_output, '// NOT COVERED:\n') ;
diff --git a/Include/GraphBLAS.h b/Include/GraphBLAS.h
index 6027e1320c..f84979eea3 100644
--- a/Include/GraphBLAS.h
+++ b/Include/GraphBLAS.h
@@ -2,7 +2,7 @@
 // GraphBLAS.h: definitions for the GraphBLAS package
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -107,10 +107,10 @@
 
 // The version of this implementation, and the GraphBLAS API version:
 #define GxB_IMPLEMENTATION_NAME "SuiteSparse:GraphBLAS"
-#define GxB_IMPLEMENTATION_DATE "Dec 16, 2019"
+#define GxB_IMPLEMENTATION_DATE "Feb 20, 2020"
 #define GxB_IMPLEMENTATION_MAJOR 3
-#define GxB_IMPLEMENTATION_MINOR 1
-#define GxB_IMPLEMENTATION_SUB   2
+#define GxB_IMPLEMENTATION_MINOR 2
+#define GxB_IMPLEMENTATION_SUB   0
 #define GxB_SPEC_DATE "May 18, 2018"
 #define GxB_SPEC_MAJOR 1
 #define GxB_SPEC_MINOR 2
@@ -123,13 +123,13 @@
 
 // The 'about' string the describes this particular implementation of GraphBLAS:
 #define GxB_IMPLEMENTATION_ABOUT \
-"SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, "                   \
+"SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, "                   \
 "All Rights Reserved.\n"                                                     \
 "http://suitesparse.com  Dept of Computer Sci. & Eng, Texas A&M University\n"
 
 // The GraphBLAS license for this particular implementation of GraphBLAS:
 #define GxB_IMPLEMENTATION_LICENSE \
-"SuiteSparse:GraphBLAS, Copyright 2017-2019, Timothy A. Davis\n"             \
+"SuiteSparse:GraphBLAS, Copyright 2017-2020, Timothy A. Davis\n"             \
 "\n"                                                                         \
 "Licensed under the Apache License, Version 2.0 (the \"License\");\n"        \
 "you may not use SuiteSparse:GraphBLAS except in compliance with the\n"      \
@@ -663,12 +663,12 @@ typedef struct GB_BinaryOp_opaque *GrB_BinaryOp ;
 //------------------------------------------------------------------------------
 
 // There are three sets of built-in binary operators.  For the first set of
-// 19 kinds of operators, x,y,z all have the same type, and they are available
-// for all 11 types, for a total of 19*11 = 209 operators.  All of them have
+// 21 kinds of operators, x,y,z all have the same type, and they are available
+// for all 11 types, for a total of 21*21 = 231 operators.  All of them have
 // a "_TYPE" suffix that denotes the type of x,y,z:
 
-//      10 general: FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES,
-//              DIV, RDIV
+//      12 general: FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES,
+//              DIV, RDIV, PAIR, ANY
 //      6 comparison: ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE
 //      3 logical: LOR, LAND, LXOR
 
@@ -683,12 +683,12 @@ typedef struct GB_BinaryOp_opaque *GrB_BinaryOp ;
 
 //      3 logical: LOR, LAND, LXOR
 
-// Thus there are 209+66+3 = 278 built-in binary operators.  Some are redundant
+// Thus there are 231+66+3 = 300 built-in binary operators.  Some are redundant
 // but are included to keep the name space of operators uniform.
 
-// For 10 binary operators z=f(x,y), x, y, and z are all the same type:
-// FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV, for all 11
-// types.
+// For 12 binary operators z=f(x,y), x, y, and z are all the same type:
+// FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV, PAIR, ANY,
+// for all 11 types.
 
 GB_PUBLIC GrB_BinaryOp
     // z = x            z = y               z = min(x,y)        z = max (x,y)
@@ -717,18 +717,18 @@ GB_PUBLIC GrB_BinaryOp
     GrB_PLUS_FP32,      GrB_MINUS_FP32,     GrB_TIMES_FP32,     GrB_DIV_FP32,
     GrB_PLUS_FP64,      GrB_MINUS_FP64,     GrB_TIMES_FP64,     GrB_DIV_FP64,
 
-    // z = y-x          z = y/x
-    GxB_RMINUS_BOOL,    GxB_RDIV_BOOL,      // ADDED in V3.0: RMINUS, RDIV
-    GxB_RMINUS_INT8,    GxB_RDIV_INT8,
-    GxB_RMINUS_UINT8,   GxB_RDIV_UINT8,
-    GxB_RMINUS_INT16,   GxB_RDIV_INT16,
-    GxB_RMINUS_UINT16,  GxB_RDIV_UINT16,
-    GxB_RMINUS_INT32,   GxB_RDIV_INT32,
-    GxB_RMINUS_UINT32,  GxB_RDIV_UINT32,
-    GxB_RMINUS_INT64,   GxB_RDIV_INT64,
-    GxB_RMINUS_UINT64,  GxB_RDIV_UINT64,
-    GxB_RMINUS_FP32,    GxB_RDIV_FP32,
-    GxB_RMINUS_FP64,    GxB_RDIV_FP64,
+    // z = y-x          z = y/x             z = 1               z = pick(x,y)
+    GxB_RMINUS_BOOL,    GxB_RDIV_BOOL,      GxB_PAIR_BOOL,      GxB_ANY_BOOL,
+    GxB_RMINUS_INT8,    GxB_RDIV_INT8,      GxB_PAIR_INT8,      GxB_ANY_INT8,
+    GxB_RMINUS_UINT8,   GxB_RDIV_UINT8,     GxB_PAIR_UINT8,     GxB_ANY_UINT8,
+    GxB_RMINUS_INT16,   GxB_RDIV_INT16,     GxB_PAIR_INT16,     GxB_ANY_INT16,
+    GxB_RMINUS_UINT16,  GxB_RDIV_UINT16,    GxB_PAIR_UINT16,    GxB_ANY_UINT16,
+    GxB_RMINUS_INT32,   GxB_RDIV_INT32,     GxB_PAIR_INT32,     GxB_ANY_INT32,
+    GxB_RMINUS_UINT32,  GxB_RDIV_UINT32,    GxB_PAIR_UINT32,    GxB_ANY_UINT32,
+    GxB_RMINUS_INT64,   GxB_RDIV_INT64,     GxB_PAIR_INT64,     GxB_ANY_INT64,
+    GxB_RMINUS_UINT64,  GxB_RDIV_UINT64,    GxB_PAIR_UINT64,    GxB_ANY_UINT64,
+    GxB_RMINUS_FP32,    GxB_RDIV_FP32,      GxB_PAIR_FP32,      GxB_ANY_FP32,
+    GxB_RMINUS_FP64,    GxB_RDIV_FP64,      GxB_PAIR_FP64,      GxB_ANY_FP64,
 
 // Six comparison operators z=f(x,y) return the same type as their inputs.
 // Each of them compute z = (x OP y), where x, y, and z all have the same type.
@@ -863,10 +863,11 @@ GB_PUBLIC GrB_BinaryOp
 // GrB_IDENTITY_BOOL, GrB_AINV_BOOL, and GrB_MINV_BOOL all give the same result
 // (z = x).
 
-// With this convention for boolean "division", there are 10 unique binary
+// With this convention for boolean "division", there are 11 unique binary
 // operators that are purely boolean; 13 *_BOOL operators are redundant but are
 // included in GraphBLAS so that the name space of operators is complete:
 
+//      z = 1           PAIR
 //      z = x           FIRST, DIV
 //      z = y           SECOND, RDIV
 //      z = (x && y)    AND, MIN, TIMES
@@ -3336,11 +3337,14 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 //       In other words, C<Mask> = accum (C,T) is split into Z = accum(C,T) ;
 //       C=0 ; C<Mask> = Z.
 //
-// GrB_MASK: can be GxB_DEFAULT or GrB_SCMP.  If GxB_DEFAULT, the mask is used
+// GrB_MASK: can be GxB_DEFAULT, GrB_COMP, GrB_STRUCTURE, or set to both
+//      GrB_COMP and GrB_STRUCTURE.  If GxB_DEFAULT, the mask is used
 //      normally, where Mask(i,j)=1 means C(i,j) can be modified by C<Mask>=Z,
 //      and Mask(i,j)=0 means it cannot be modified even if Z(i,j) is has been
-//      computed and differs from C(i,j).  If GrB_SCMP, this is the same as
-//      taking the logical complement of the Mask.
+//      computed and differs from C(i,j).  If GrB_COMP, this is the same as
+//      taking the logical complement of the Mask.  If GrB_STRUCTURE is set,
+//      the value of the mask is not considered, just its pattern.  The
+//      GrB_COMP and GrB_STRUCTURE settings can be combined.
 //
 // GrB_INP0: can be GxB_DEFAULT or GrB_TRAN.  If GxB_DEFAULT, the first input
 //      is used as-is.  If GrB_TRAN, it is transposed.  Only matrices are
@@ -3369,22 +3373,34 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 //      except that floating-point roundoff may differ when working on
 //      floating-point data types.
 //
-//      GxB_AxB_GUSTAVSON:  Gustavon's method, computing C(:,j)=A*B(,j) via
-//          a gather/scatter workspace of size equal to the number of rows of A.
-//          Very good general-purpose method, but sometimes the workspace can be
-//          too large when many threads are used..
+//      GxB_AxB_SAXPY:  C(:,j)=A*B(:,j) is computed using a mix of Gustavson,
+//          Hash, and (in the future) the Heap method.  Each task in the
+//          parallel computation makes its own decision, via a heuristic.
 //
-//      GxB_AxB_HEAP: a heap-based method, computing C(:,j)=A*B(:,j) via a heap
-//          of size equal to the maximum number of entries in any column of B.
-//          Very good for hypersparse matrices, particularly when nnz(B) is
-//          less than the number of rows of A.
+//      GxB_AxB_GUSTAVSON:  This is the same as GxB_AxB_SAXPY, except that
+//          every task uses Gustavon's method, computing C(:,j)=A*B(:,j) via a
+//          gather/scatter workspace of size equal to the number of rows of A.
+//          Very good general-purpose method, but sometimes the workspace can
+//          be too large when many threads are used.
+//
+//      GxB_AxB_HEAP: a heap-based saxpy-style method, computing
+//          C(:,j)=A*B(:,j) via a heap of size equal to the maximum number of
+//          entries in any column of B.  Very good for hypersparse matrices,
+//          particularly when nnz(B) is less than the number of rows of A.
+//          The Heap method is no longer available in v3.2, so it is silently
+//          replaced with GxB_AxB_HASH.  It may reappear in a future version.
+//
+//      GxB_AxB_HASH: This is the same as GxB_AxB_SAXPY, except that every
+//          task uses the Hash method.  Like the Heap method, it is very good
+//          for hypersparse matrices and uses very little workspace (but more
+//          workspace than the Heap method).
 //
 //      GxB_AxB_DOT: computes C(i,j) = A(:,i)'*B(:,j), for each entry C(i,j).
 //          A very specialized method that works well only if the mask is
-//          present, very sparse, and not complemented, or when C is tiny.
-//          It is impossibly slow if C is large and the mask is not present,
-//          since it takes Omega(m*n) time if C is m-by-n.  Uses a 2-phase
-//          method.  The first phase is symbolic, and the 2nd phase is numeric.
+//          present, very sparse, and not complemented, when C is a dense
+//          vector or matrix, or when C is tiny.  It is impossibly slow if C is
+//          large and the mask is not present, since it takes Omega(m*n) time
+//          if C is m-by-n.
 
 // GxB_NTHREADS and GxB_CHUNK are an enumerated value in both the
 // GrB_Desc_Field and the GxB_Option_Field.  They are defined with the same
@@ -3393,16 +3409,9 @@ GrB_Info GrB_Matrix_extractTuples           // [I,J,X] = find (A)
 #define GxB_NTHREADS 5
 #define GxB_CHUNK 7
 
-// GxB_NTHREADS_MAX is a compile-time constant that gives the upper bound on
-// the number of threads that GraphBLAS can use.  This thread count is the sum
-// of the maximum number of user threads and the number of internal OpenMP
-// threads created inside GraphBLAS by each user thread (nthreads_max, which
-// can be set by GxB_set (GxB_NTHREADS, nthreads_max)).  It is the maximum
-// permitted value of the run-time value nthreads_max.  This constant can be
-// changed at compile-time by using -DGxB_NTHREADS_MAX=4096, for example.  The
-// upper limit below should be large enough...
+// GxB_NTHREADS_MAX is no longer used, as of v3.2.0.
 #ifndef GxB_NTHREADS_MAX
-#define GxB_NTHREADS_MAX 2048
+#define GxB_NTHREADS_MAX INT32_MAX
 #endif
 
 typedef enum
@@ -3425,8 +3434,10 @@ typedef enum
 }
 GrB_Desc_Field ;
 
-// SPEC: GxB_DEFAULT, GxB_NTHREADS, GxB_CHUNK and GxB_AxB_* are extensionsi
-// to the spec.
+// SPEC: GxB_DEFAULT, GxB_NTHREADS, GxB_CHUNK and GxB_AxB_* are extensions to
+// the spec.  In the spec, setting both GrB_COMP and GrB_STRUCTURE can be done
+// with two calls to GrB_Descriptor_set.  As an extension to the spec, they can
+// also be set with a single call, using the setting GrB_COMP+GrB_STRUCTURE.
 
 typedef enum
 {
@@ -3436,8 +3447,10 @@ typedef enum
     // for GrB_OUTP only:
     GrB_REPLACE = 1,    // clear the output before assigning new values to it
 
-    // for GrB_MASK only: these two options are identical
-    GrB_SCMP = 2,       // use the structural complement of the input
+    // for GrB_MASK only:
+    GrB_COMP = 2,       // use the structural complement of the input
+    GrB_SCMP = 2,       // same as GrB_COMP (deprecated; use GrB_COMP instead)
+    GrB_STRUCTURE = 4,  // use the only pattern of the mask, not its values
 
     // for GrB_INP0 and GrB_INP1 only:
     GrB_TRAN = 3,       // use the transpose of the input
@@ -3446,7 +3459,8 @@ typedef enum
     GxB_AxB_GUSTAVSON = 1001,   // gather-scatter saxpy method
     GxB_AxB_HEAP      = 1002,   // heap-based saxpy method
     GxB_AxB_DOT       = 1003,   // dot product
-//  GxB_AxB_HASH      = 1004    // hash-based saxpy method (FUTURE)
+    GxB_AxB_HASH      = 1004,   // hash-based saxpy method
+    GxB_AxB_SAXPY     = 1005    // saxpy method (any kind)
 }
 GrB_Desc_Value ;
 
@@ -3520,6 +3534,63 @@ GrB_Info GrB_Descriptor_free    // free a descriptor
     GrB_Descriptor *descriptor  // handle of descriptor to free
 ) ;
 
+// Predefined descriptors and their values:
+
+GB_PUBLIC
+GrB_Descriptor     // OUTP         MASK           MASK       INP0      INP1
+                   //              structural     complement
+                   // ===========  ============== ========== ========  ========
+
+// GrB_NULL        // -            -              -          -         -
+GrB_DESC_T1      , // -            -              -          -         GrB_TRAN
+GrB_DESC_T0      , // -            -              -          GrB_TRAN  -       
+GrB_DESC_T0T1    , // -            -              -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_C       , // -            -              GrB_COMP   -         -       
+GrB_DESC_CT1     , // -            -              GrB_COMP   -         GrB_TRAN
+GrB_DESC_CT0     , // -            -              GrB_COMP   GrB_TRAN  -       
+GrB_DESC_CT0T1   , // -            -              GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_S       , // -            GrB_STRUCTURE  -          -         -       
+GrB_DESC_ST1     , // -            GrB_STRUCTURE  -          -         GrB_TRAN
+GrB_DESC_ST0     , // -            GrB_STRUCTURE  -          GrB_TRAN  -       
+GrB_DESC_ST0T1   , // -            GrB_STRUCTURE  -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_SC      , // -            GrB_STRUCTURE  GrB_COMP   -         -       
+GrB_DESC_SCT1    , // -            GrB_STRUCTURE  GrB_COMP   -         GrB_TRAN
+GrB_DESC_SCT0    , // -            GrB_STRUCTURE  GrB_COMP   GrB_TRAN  -       
+GrB_DESC_SCT0T1  , // -            GrB_STRUCTURE  GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_R       , // GrB_REPLACE  -              -          -         -       
+GrB_DESC_RT1     , // GrB_REPLACE  -              -          -         GrB_TRAN
+GrB_DESC_RT0     , // GrB_REPLACE  -              -          GrB_TRAN  -       
+GrB_DESC_RT0T1   , // GrB_REPLACE  -              -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RC      , // GrB_REPLACE  -              GrB_COMP   -         -       
+GrB_DESC_RCT1    , // GrB_REPLACE  -              GrB_COMP   -         GrB_TRAN
+GrB_DESC_RCT0    , // GrB_REPLACE  -              GrB_COMP   GrB_TRAN  -       
+GrB_DESC_RCT0T1  , // GrB_REPLACE  -              GrB_COMP   GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RS      , // GrB_REPLACE  GrB_STRUCTURE  -          -         -       
+GrB_DESC_RST1    , // GrB_REPLACE  GrB_STRUCTURE  -          -         GrB_TRAN
+GrB_DESC_RST0    , // GrB_REPLACE  GrB_STRUCTURE  -          GrB_TRAN  -       
+GrB_DESC_RST0T1  , // GrB_REPLACE  GrB_STRUCTURE  -          GrB_TRAN  GrB_TRAN
+
+GrB_DESC_RSC     , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   -         -       
+GrB_DESC_RSCT1   , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   -         GrB_TRAN
+GrB_DESC_RSCT0   , // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   GrB_TRAN  -       
+GrB_DESC_RSCT0T1 ; // GrB_REPLACE  GrB_STRUCTURE  GrB_COMP   GrB_TRAN  GrB_TRAN
+
+// GrB_NULL is the default descriptor, with all settings at their defaults:
+//
+//      OUTP: do not replace the output
+//      MASK: mask is valued and not complemented
+//      INP0: first input not transposed
+//      INP1: second input not transposed
+
+// Predefined descriptors may not be modified or freed.  Attempting to modify
+// them results in an error (GrB_INVALID_VALUE).  Attempts to free them are
+// silently ignored.
 
 //==============================================================================
 //=== SuiteSparse:GraphBLAS options ============================================
@@ -3586,7 +3657,8 @@ typedef enum            // for global options or matrix options
     GxB_API_VERSION = 16,           // API version (3 int's)
     GxB_API_DATE = 17,              // date of the API (char *)
     GxB_API_ABOUT = 18,             // about the API (char *)
-    GxB_API_URL = 19                // URL for the API (char *)
+    GxB_API_URL = 19,               // URL for the API (char *)
+    GxB_BURBLE = 20                 // development only (bool *)
 
 } GxB_Option_Field ;
 
@@ -3699,7 +3771,6 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_set (GxB_FORMAT, GxB_BY_COL) ;
 //      GxB_get (GxB_FORMAT, GxB_Format_Value *s) ;
 //
-//      // see the GxB_NTHREADS_MAX discussion above
 //      GxB_set (GxB_NTHREADS, nthreads_max) ;
 //      GxB_get (GxB_NTHREADS, int *nthreads_max) ;
 //
@@ -3734,7 +3805,8 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_get (GrB_Descriptor d, GrB_OUTP, GrB_Desc_Value *v) ;
 //
 //      GxB_set (GrB_Descriptor d, GrB_MASK, GxB_DEFAULT) ;
-//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_SCMP) ;
+//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_COMP) ;
+//      GxB_set (GrB_Descriptor d, GrB_MASK, GrB_STRUCTURE) ;
 //      GxB_get (GrB_Descriptor d, GrB_MASK, GrB_Desc_Value *v) ;
 //
 //      GxB_set (GrB_Descriptor d, GrB_INP0, GxB_DEFAULT) ;
@@ -3748,6 +3820,8 @@ GrB_Info GxB_Global_Option_get      // gets the current global default option
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_DEFAULT) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HEAP) ;
+//      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_HASH) ;
+//      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_SAXPY) ;
 //      GxB_set (GrB_Descriptor d, GxB_AxB_METHOD, GxB_AxB_DOT) ;
 //      GxB_get (GrB_Descriptor d, GrB_AxB_METHOD, GrB_Desc_Value *v) ;
 //
@@ -4298,7 +4372,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 //      and no part of C outside the submatrix is ever modified.  In
 //      GrB_assign, it is possible to modify C outside the submatrix, but only
 //      in one specific manner.  Suppose the mask M is present (or, suppose it
-//      is not present but GrB_SCMP is true).  After (optionally) complementing
+//      is not present but GrB_COMP is true).  After (optionally) complementing
 //      the mask, the value of M(i,j) can be 0 for some entry outside the
 //      C(I,J) submatrix.  If the GrB_REPLACE descriptor is true, the
 //      GrB_assign deletes this entry.  This case does not occur if GrB_REPLACE
@@ -4308,7 +4382,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 
 // GxB_subassign and GrB_assign are identical if GrB_REPLACE is set to its
 // default value of false, or if the masks happen to be the same.  The two
-// masks can be the same in two cases:  either there is no mask (and GrB_SCMP
+// masks can be the same in two cases:  either there is no mask (and GrB_COMP
 // is false), or I and J are both GrB_ALL.  In this case, the two algorithms
 // are identical and have the same performance.
 
@@ -4330,7 +4404,7 @@ GrB_Info GrB_Col_extract            // w<mask> = accum (w, A(I,j))
 // matrix and vector subassign: C(I,J)<Mask> = accum (C(I,J), A)
 // matrix and vector    assign: C<Mask>(I,J) = accum (C(I,J), A)
 
-// This notation does not include the details of the GrB_SCMP and GrB_REPLACE
+// This notation does not include the details of the GrB_COMP and GrB_REPLACE
 // descriptors, but it does illustrate the difference in the Mask.  In the
 // subassign, Mask is the same size as C(I,J) and A.  If I[0]=i and J[0]=j,
 // Then Mask(0,0) controls how C(i,j) is modified by the subassign, from the
@@ -5779,7 +5853,7 @@ GrB_Info GrB_Matrix_reduce_UDT      // c = accum (c, reduce_to_scalar (A))
 // GrB_Matrix_reduce_Monoid   (w,mask,acc,mo,A,d) // w<mask> = acc (w,reduce(A))
 // GrB_Matrix_reduce_BinaryOp (w,mask,acc,op,A,d) // w<mask> = acc (w,reduce(A))
 // reduce matrix to scalar:
-// GrB_Vector_reduce_[SCALAR] (c,acc,monoid,u,d)  // c = acc (c,reduce(A))
+// GrB_Vector_reduce_[SCALAR] (c,acc,monoid,u,d)  // c = acc (c,reduce(u))
 // GrB_Matrix_reduce_[SCALAR] (c,acc,monoid,A,d)  // c = acc (c,reduce(A))
 
 #if GxB_STDC_VERSION >= 201112L
@@ -5893,9 +5967,9 @@ GrB_Info GrB_transpose              // C<Mask> = accum (C, A')
 // built-in monoids
 //------------------------------------------------------------------------------
 
-// 44 unique monoids can be constructed using built-in types and operators, all
-// of which are defined below.  Four operators (min, max, plus, times) are
-// available for each of the 10 non-Boolean types, and four purely Boolean
+// 55 monoids can be constructed using built-in types and operators, all of
+// which are defined below.  Five operators (min, max, plus, times, any) are
+// available for each of the 10 non-Boolean types, plus five purely Boolean
 // monoids are available.
 
 GB_PUBLIC GrB_Monoid
@@ -5948,7 +6022,20 @@ GB_PUBLIC GrB_Monoid
     GxB_TIMES_FP32_MONOID,        // identity: 1            terminal: none
     GxB_TIMES_FP64_MONOID,        // identity: 1            terminal: none
 
+    // ANY monoids:
+    GxB_ANY_INT8_MONOID,          // identity: any value    terminal: any value
+    GxB_ANY_INT16_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_INT32_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_INT64_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_UINT8_MONOID,         // identity: any value    terminal: any value
+    GxB_ANY_UINT16_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_UINT32_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_UINT64_MONOID,        // identity: any value    terminal: any value
+    GxB_ANY_FP32_MONOID,          // identity: any value    terminal: any value
+    GxB_ANY_FP64_MONOID,          // identity: any value    terminal: any value
+
     // Boolean monoids:
+    GxB_ANY_BOOL_MONOID,          // identity: any value    terminal: any value
     GxB_LOR_BOOL_MONOID,          // identity: false        terminal: true
     GxB_LAND_BOOL_MONOID,         // identity: true         terminal: false
     GxB_LXOR_BOOL_MONOID,         // identity: false
@@ -5958,34 +6045,40 @@ GB_PUBLIC GrB_Monoid
 // built-in semirings
 //------------------------------------------------------------------------------
 
-// Using built-in types and operators, 960 unique semirings can be built.  This
-// count excludes redundant Boolean operators (for example GxB_TIMES_BOOL and
+// Using built-in types and operators, 1355 unique semirings can be built.
+// This count excludes redundant semirings (for example GxB_TIMES_BOOL and
 // GxB_LAND_BOOL are different operators but they are redundant since they
-// always return the same result):
+// always return the same result).
 
-// 760 semirings with a multiply operator TxT -> T where T is non-Boolean, from
-// the complete cross product of:
+// 1000 semirings with a multiply operator TxT -> T where T is non-Boolean,
+// from the complete cross product of:
 
-//      4 add monoids (MIN, MAX, PLUS, TIMES)
-//      19 multiply operators:
-//          (FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
+//      5 add monoids: MIN, MAX, PLUS, TIMES, ANY
+//      20 multiply operators:
+//           FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
 //           ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//           LOR, LAND, LXOR)
+//           LOR, LAND, LXOR
 //      10 non-Boolean types, T
 
-// 240 semirings with a comparison operator TxT -> bool, where T is
+// 300 semirings with a comparison operator TxT -> bool, where T is
 // non-Boolean, from the complete cross product of:
 
-//      4 Boolean add monoids: (LAND, LOR, LXOR, EQ)
-//      6 multiply operators: (EQ, NE, GT, LT, GE, LE)
+//      5 Boolean add monoids: LAND, LOR, LXOR, EQ, ANY
+//      6 multiply operators: EQ, NE, GT, LT, GE, LE
 //      10 non-Boolean types, T
 
-// 40 semirings with purely Boolean types, bool x bool -> bool, from the
+// 55 semirings with purely Boolean types, bool x bool -> bool, from the
 // complete cross product of:
 
-//      4 Boolean add monoids (LAND, LOR, LXOR, EQ)
-//      10 multiply operators:
-//          (FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE)
+//      5 Boolean add monoids LAND, LOR, LXOR, EQ, ANY
+//      11 multiply operators:
+//          FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE, PAIR
+
+// The ANY operator is also valid to use as a multiplicative operator in a
+// semiring, but serves no purpose in that case.  The ANY operator is meant as
+// a fast additive operator for a monoid, that terminates, or short-circuits,
+// as soon as any value is found.  A valid user semiring can be constructed
+// with ANY as the multiply operator, but they are not predefined below.
 
 // In the names below, each semiring has a name of the form GxB_add_mult_T
 // where add is the additive monoid, mult is the multiply operator, and T is
@@ -5997,330 +6090,342 @@ GB_PUBLIC GrB_Monoid
 GB_PUBLIC GrB_Semiring
 
 //------------------------------------------------------------------------------
-// 680 non-Boolean semirings where all types are the same, given by suffix _T
-//------------------------------------------------------------------------------
-
-// semirings with multiply op: z = FIRST (x,y), all types x,y,z the same:
-GxB_MIN_FIRST_INT8     , GxB_MAX_FIRST_INT8     , GxB_PLUS_FIRST_INT8    , GxB_TIMES_FIRST_INT8   ,
-GxB_MIN_FIRST_UINT8    , GxB_MAX_FIRST_UINT8    , GxB_PLUS_FIRST_UINT8   , GxB_TIMES_FIRST_UINT8  ,
-GxB_MIN_FIRST_INT16    , GxB_MAX_FIRST_INT16    , GxB_PLUS_FIRST_INT16   , GxB_TIMES_FIRST_INT16  ,
-GxB_MIN_FIRST_UINT16   , GxB_MAX_FIRST_UINT16   , GxB_PLUS_FIRST_UINT16  , GxB_TIMES_FIRST_UINT16 ,
-GxB_MIN_FIRST_INT32    , GxB_MAX_FIRST_INT32    , GxB_PLUS_FIRST_INT32   , GxB_TIMES_FIRST_INT32  ,
-GxB_MIN_FIRST_UINT32   , GxB_MAX_FIRST_UINT32   , GxB_PLUS_FIRST_UINT32  , GxB_TIMES_FIRST_UINT32 ,
-GxB_MIN_FIRST_INT64    , GxB_MAX_FIRST_INT64    , GxB_PLUS_FIRST_INT64   , GxB_TIMES_FIRST_INT64  ,
-GxB_MIN_FIRST_UINT64   , GxB_MAX_FIRST_UINT64   , GxB_PLUS_FIRST_UINT64  , GxB_TIMES_FIRST_UINT64 ,
-GxB_MIN_FIRST_FP32     , GxB_MAX_FIRST_FP32     , GxB_PLUS_FIRST_FP32    , GxB_TIMES_FIRST_FP32   ,
-GxB_MIN_FIRST_FP64     , GxB_MAX_FIRST_FP64     , GxB_PLUS_FIRST_FP64    , GxB_TIMES_FIRST_FP64   ,
-
-// semirings with multiply op: z = SECOND (x,y), all types x,y,z the same:
-GxB_MIN_SECOND_INT8    , GxB_MAX_SECOND_INT8    , GxB_PLUS_SECOND_INT8   , GxB_TIMES_SECOND_INT8  ,
-GxB_MIN_SECOND_UINT8   , GxB_MAX_SECOND_UINT8   , GxB_PLUS_SECOND_UINT8  , GxB_TIMES_SECOND_UINT8 ,
-GxB_MIN_SECOND_INT16   , GxB_MAX_SECOND_INT16   , GxB_PLUS_SECOND_INT16  , GxB_TIMES_SECOND_INT16 ,
-GxB_MIN_SECOND_UINT16  , GxB_MAX_SECOND_UINT16  , GxB_PLUS_SECOND_UINT16 , GxB_TIMES_SECOND_UINT16,
-GxB_MIN_SECOND_INT32   , GxB_MAX_SECOND_INT32   , GxB_PLUS_SECOND_INT32  , GxB_TIMES_SECOND_INT32 ,
-GxB_MIN_SECOND_UINT32  , GxB_MAX_SECOND_UINT32  , GxB_PLUS_SECOND_UINT32 , GxB_TIMES_SECOND_UINT32,
-GxB_MIN_SECOND_INT64   , GxB_MAX_SECOND_INT64   , GxB_PLUS_SECOND_INT64  , GxB_TIMES_SECOND_INT64 ,
-GxB_MIN_SECOND_UINT64  , GxB_MAX_SECOND_UINT64  , GxB_PLUS_SECOND_UINT64 , GxB_TIMES_SECOND_UINT64,
-GxB_MIN_SECOND_FP32    , GxB_MAX_SECOND_FP32    , GxB_PLUS_SECOND_FP32   , GxB_TIMES_SECOND_FP32  ,
-GxB_MIN_SECOND_FP64    , GxB_MAX_SECOND_FP64    , GxB_PLUS_SECOND_FP64   , GxB_TIMES_SECOND_FP64  ,
-
-// semirings with multiply op: z = MIN (x,y), all types x,y,z the same:
-GxB_MIN_MIN_INT8       , GxB_MAX_MIN_INT8       , GxB_PLUS_MIN_INT8      , GxB_TIMES_MIN_INT8     ,
-GxB_MIN_MIN_UINT8      , GxB_MAX_MIN_UINT8      , GxB_PLUS_MIN_UINT8     , GxB_TIMES_MIN_UINT8    ,
-GxB_MIN_MIN_INT16      , GxB_MAX_MIN_INT16      , GxB_PLUS_MIN_INT16     , GxB_TIMES_MIN_INT16    ,
-GxB_MIN_MIN_UINT16     , GxB_MAX_MIN_UINT16     , GxB_PLUS_MIN_UINT16    , GxB_TIMES_MIN_UINT16   ,
-GxB_MIN_MIN_INT32      , GxB_MAX_MIN_INT32      , GxB_PLUS_MIN_INT32     , GxB_TIMES_MIN_INT32    ,
-GxB_MIN_MIN_UINT32     , GxB_MAX_MIN_UINT32     , GxB_PLUS_MIN_UINT32    , GxB_TIMES_MIN_UINT32   ,
-GxB_MIN_MIN_INT64      , GxB_MAX_MIN_INT64      , GxB_PLUS_MIN_INT64     , GxB_TIMES_MIN_INT64    ,
-GxB_MIN_MIN_UINT64     , GxB_MAX_MIN_UINT64     , GxB_PLUS_MIN_UINT64    , GxB_TIMES_MIN_UINT64   ,
-GxB_MIN_MIN_FP32       , GxB_MAX_MIN_FP32       , GxB_PLUS_MIN_FP32      , GxB_TIMES_MIN_FP32     ,
-GxB_MIN_MIN_FP64       , GxB_MAX_MIN_FP64       , GxB_PLUS_MIN_FP64      , GxB_TIMES_MIN_FP64     ,
-
-// semirings with multiply op: z = MAX (x,y), all types x,y,z the same:
-GxB_MIN_MAX_INT8       , GxB_MAX_MAX_INT8       , GxB_PLUS_MAX_INT8      , GxB_TIMES_MAX_INT8     ,
-GxB_MIN_MAX_UINT8      , GxB_MAX_MAX_UINT8      , GxB_PLUS_MAX_UINT8     , GxB_TIMES_MAX_UINT8    ,
-GxB_MIN_MAX_INT16      , GxB_MAX_MAX_INT16      , GxB_PLUS_MAX_INT16     , GxB_TIMES_MAX_INT16    ,
-GxB_MIN_MAX_UINT16     , GxB_MAX_MAX_UINT16     , GxB_PLUS_MAX_UINT16    , GxB_TIMES_MAX_UINT16   ,
-GxB_MIN_MAX_INT32      , GxB_MAX_MAX_INT32      , GxB_PLUS_MAX_INT32     , GxB_TIMES_MAX_INT32    ,
-GxB_MIN_MAX_UINT32     , GxB_MAX_MAX_UINT32     , GxB_PLUS_MAX_UINT32    , GxB_TIMES_MAX_UINT32   ,
-GxB_MIN_MAX_INT64      , GxB_MAX_MAX_INT64      , GxB_PLUS_MAX_INT64     , GxB_TIMES_MAX_INT64    ,
-GxB_MIN_MAX_UINT64     , GxB_MAX_MAX_UINT64     , GxB_PLUS_MAX_UINT64    , GxB_TIMES_MAX_UINT64   ,
-GxB_MIN_MAX_FP32       , GxB_MAX_MAX_FP32       , GxB_PLUS_MAX_FP32      , GxB_TIMES_MAX_FP32     ,
-GxB_MIN_MAX_FP64       , GxB_MAX_MAX_FP64       , GxB_PLUS_MAX_FP64      , GxB_TIMES_MAX_FP64     ,
-
-// semirings with multiply op: z = PLUS (x,y), all types x,y,z the same:
-GxB_MIN_PLUS_INT8      , GxB_MAX_PLUS_INT8      , GxB_PLUS_PLUS_INT8     , GxB_TIMES_PLUS_INT8    ,
-GxB_MIN_PLUS_UINT8     , GxB_MAX_PLUS_UINT8     , GxB_PLUS_PLUS_UINT8    , GxB_TIMES_PLUS_UINT8   ,
-GxB_MIN_PLUS_INT16     , GxB_MAX_PLUS_INT16     , GxB_PLUS_PLUS_INT16    , GxB_TIMES_PLUS_INT16   ,
-GxB_MIN_PLUS_UINT16    , GxB_MAX_PLUS_UINT16    , GxB_PLUS_PLUS_UINT16   , GxB_TIMES_PLUS_UINT16  ,
-GxB_MIN_PLUS_INT32     , GxB_MAX_PLUS_INT32     , GxB_PLUS_PLUS_INT32    , GxB_TIMES_PLUS_INT32   ,
-GxB_MIN_PLUS_UINT32    , GxB_MAX_PLUS_UINT32    , GxB_PLUS_PLUS_UINT32   , GxB_TIMES_PLUS_UINT32  ,
-GxB_MIN_PLUS_INT64     , GxB_MAX_PLUS_INT64     , GxB_PLUS_PLUS_INT64    , GxB_TIMES_PLUS_INT64   ,
-GxB_MIN_PLUS_UINT64    , GxB_MAX_PLUS_UINT64    , GxB_PLUS_PLUS_UINT64   , GxB_TIMES_PLUS_UINT64  ,
-GxB_MIN_PLUS_FP32      , GxB_MAX_PLUS_FP32      , GxB_PLUS_PLUS_FP32     , GxB_TIMES_PLUS_FP32    ,
-GxB_MIN_PLUS_FP64      , GxB_MAX_PLUS_FP64      , GxB_PLUS_PLUS_FP64     , GxB_TIMES_PLUS_FP64    ,
-
-// semirings with multiply op: z = MINUS (x,y), all types x,y,z the same:
-GxB_MIN_MINUS_INT8     , GxB_MAX_MINUS_INT8     , GxB_PLUS_MINUS_INT8    , GxB_TIMES_MINUS_INT8   ,
-GxB_MIN_MINUS_UINT8    , GxB_MAX_MINUS_UINT8    , GxB_PLUS_MINUS_UINT8   , GxB_TIMES_MINUS_UINT8  ,
-GxB_MIN_MINUS_INT16    , GxB_MAX_MINUS_INT16    , GxB_PLUS_MINUS_INT16   , GxB_TIMES_MINUS_INT16  ,
-GxB_MIN_MINUS_UINT16   , GxB_MAX_MINUS_UINT16   , GxB_PLUS_MINUS_UINT16  , GxB_TIMES_MINUS_UINT16 ,
-GxB_MIN_MINUS_INT32    , GxB_MAX_MINUS_INT32    , GxB_PLUS_MINUS_INT32   , GxB_TIMES_MINUS_INT32  ,
-GxB_MIN_MINUS_UINT32   , GxB_MAX_MINUS_UINT32   , GxB_PLUS_MINUS_UINT32  , GxB_TIMES_MINUS_UINT32 ,
-GxB_MIN_MINUS_INT64    , GxB_MAX_MINUS_INT64    , GxB_PLUS_MINUS_INT64   , GxB_TIMES_MINUS_INT64  ,
-GxB_MIN_MINUS_UINT64   , GxB_MAX_MINUS_UINT64   , GxB_PLUS_MINUS_UINT64  , GxB_TIMES_MINUS_UINT64 ,
-GxB_MIN_MINUS_FP32     , GxB_MAX_MINUS_FP32     , GxB_PLUS_MINUS_FP32    , GxB_TIMES_MINUS_FP32   ,
-GxB_MIN_MINUS_FP64     , GxB_MAX_MINUS_FP64     , GxB_PLUS_MINUS_FP64    , GxB_TIMES_MINUS_FP64   ,
-
-// ADDED in V3.0: semirings with RDIV and RMINUS:
-
-// semirings with multiply op: z = RMINUS (x,y), all types x,y,z the same:
-GxB_MIN_RMINUS_INT8    , GxB_MAX_RMINUS_INT8    , GxB_PLUS_RMINUS_INT8   , GxB_TIMES_RMINUS_INT8   ,
-GxB_MIN_RMINUS_UINT8   , GxB_MAX_RMINUS_UINT8   , GxB_PLUS_RMINUS_UINT8  , GxB_TIMES_RMINUS_UINT8  ,
-GxB_MIN_RMINUS_INT16   , GxB_MAX_RMINUS_INT16   , GxB_PLUS_RMINUS_INT16  , GxB_TIMES_RMINUS_INT16  ,
-GxB_MIN_RMINUS_UINT16  , GxB_MAX_RMINUS_UINT16  , GxB_PLUS_RMINUS_UINT16 , GxB_TIMES_RMINUS_UINT16 ,
-GxB_MIN_RMINUS_INT32   , GxB_MAX_RMINUS_INT32   , GxB_PLUS_RMINUS_INT32  , GxB_TIMES_RMINUS_INT32  ,
-GxB_MIN_RMINUS_UINT32  , GxB_MAX_RMINUS_UINT32  , GxB_PLUS_RMINUS_UINT32 , GxB_TIMES_RMINUS_UINT32 ,
-GxB_MIN_RMINUS_INT64   , GxB_MAX_RMINUS_INT64   , GxB_PLUS_RMINUS_INT64  , GxB_TIMES_RMINUS_INT64  ,
-GxB_MIN_RMINUS_UINT64  , GxB_MAX_RMINUS_UINT64  , GxB_PLUS_RMINUS_UINT64 , GxB_TIMES_RMINUS_UINT64 ,
-GxB_MIN_RMINUS_FP32    , GxB_MAX_RMINUS_FP32    , GxB_PLUS_RMINUS_FP32   , GxB_TIMES_RMINUS_FP32   ,
-GxB_MIN_RMINUS_FP64    , GxB_MAX_RMINUS_FP64    , GxB_PLUS_RMINUS_FP64   , GxB_TIMES_RMINUS_FP64   ,
-
-// semirings with multiply op: z = TIMES (x,y), all types x,y,z the same:
-GxB_MIN_TIMES_INT8     , GxB_MAX_TIMES_INT8     , GxB_PLUS_TIMES_INT8    , GxB_TIMES_TIMES_INT8   ,
-GxB_MIN_TIMES_UINT8    , GxB_MAX_TIMES_UINT8    , GxB_PLUS_TIMES_UINT8   , GxB_TIMES_TIMES_UINT8  ,
-GxB_MIN_TIMES_INT16    , GxB_MAX_TIMES_INT16    , GxB_PLUS_TIMES_INT16   , GxB_TIMES_TIMES_INT16  ,
-GxB_MIN_TIMES_UINT16   , GxB_MAX_TIMES_UINT16   , GxB_PLUS_TIMES_UINT16  , GxB_TIMES_TIMES_UINT16 ,
-GxB_MIN_TIMES_INT32    , GxB_MAX_TIMES_INT32    , GxB_PLUS_TIMES_INT32   , GxB_TIMES_TIMES_INT32  ,
-GxB_MIN_TIMES_UINT32   , GxB_MAX_TIMES_UINT32   , GxB_PLUS_TIMES_UINT32  , GxB_TIMES_TIMES_UINT32 ,
-GxB_MIN_TIMES_INT64    , GxB_MAX_TIMES_INT64    , GxB_PLUS_TIMES_INT64   , GxB_TIMES_TIMES_INT64  ,
-GxB_MIN_TIMES_UINT64   , GxB_MAX_TIMES_UINT64   , GxB_PLUS_TIMES_UINT64  , GxB_TIMES_TIMES_UINT64 ,
-GxB_MIN_TIMES_FP32     , GxB_MAX_TIMES_FP32     , GxB_PLUS_TIMES_FP32    , GxB_TIMES_TIMES_FP32   ,
-GxB_MIN_TIMES_FP64     , GxB_MAX_TIMES_FP64     , GxB_PLUS_TIMES_FP64    , GxB_TIMES_TIMES_FP64   ,
-
-// semirings with multiply op: z = DIV (x,y), all types x,y,z the same:
-GxB_MIN_DIV_INT8       , GxB_MAX_DIV_INT8       , GxB_PLUS_DIV_INT8      , GxB_TIMES_DIV_INT8     ,
-GxB_MIN_DIV_UINT8      , GxB_MAX_DIV_UINT8      , GxB_PLUS_DIV_UINT8     , GxB_TIMES_DIV_UINT8    ,
-GxB_MIN_DIV_INT16      , GxB_MAX_DIV_INT16      , GxB_PLUS_DIV_INT16     , GxB_TIMES_DIV_INT16    ,
-GxB_MIN_DIV_UINT16     , GxB_MAX_DIV_UINT16     , GxB_PLUS_DIV_UINT16    , GxB_TIMES_DIV_UINT16   ,
-GxB_MIN_DIV_INT32      , GxB_MAX_DIV_INT32      , GxB_PLUS_DIV_INT32     , GxB_TIMES_DIV_INT32    ,
-GxB_MIN_DIV_UINT32     , GxB_MAX_DIV_UINT32     , GxB_PLUS_DIV_UINT32    , GxB_TIMES_DIV_UINT32   ,
-GxB_MIN_DIV_INT64      , GxB_MAX_DIV_INT64      , GxB_PLUS_DIV_INT64     , GxB_TIMES_DIV_INT64    ,
-GxB_MIN_DIV_UINT64     , GxB_MAX_DIV_UINT64     , GxB_PLUS_DIV_UINT64    , GxB_TIMES_DIV_UINT64   ,
-GxB_MIN_DIV_FP32       , GxB_MAX_DIV_FP32       , GxB_PLUS_DIV_FP32      , GxB_TIMES_DIV_FP32     ,
-GxB_MIN_DIV_FP64       , GxB_MAX_DIV_FP64       , GxB_PLUS_DIV_FP64      , GxB_TIMES_DIV_FP64     ,
-
-// semirings with multiply op: z = RDIV (x,y), all types x,y,z the same:
-GxB_MIN_RDIV_INT8      , GxB_MAX_RDIV_INT8      , GxB_PLUS_RDIV_INT8     , GxB_TIMES_RDIV_INT8    ,
-GxB_MIN_RDIV_UINT8     , GxB_MAX_RDIV_UINT8     , GxB_PLUS_RDIV_UINT8    , GxB_TIMES_RDIV_UINT8   ,
-GxB_MIN_RDIV_INT16     , GxB_MAX_RDIV_INT16     , GxB_PLUS_RDIV_INT16    , GxB_TIMES_RDIV_INT16   ,
-GxB_MIN_RDIV_UINT16    , GxB_MAX_RDIV_UINT16    , GxB_PLUS_RDIV_UINT16   , GxB_TIMES_RDIV_UINT16  ,
-GxB_MIN_RDIV_INT32     , GxB_MAX_RDIV_INT32     , GxB_PLUS_RDIV_INT32    , GxB_TIMES_RDIV_INT32   ,
-GxB_MIN_RDIV_UINT32    , GxB_MAX_RDIV_UINT32    , GxB_PLUS_RDIV_UINT32   , GxB_TIMES_RDIV_UINT32  ,
-GxB_MIN_RDIV_INT64     , GxB_MAX_RDIV_INT64     , GxB_PLUS_RDIV_INT64    , GxB_TIMES_RDIV_INT64   ,
-GxB_MIN_RDIV_UINT64    , GxB_MAX_RDIV_UINT64    , GxB_PLUS_RDIV_UINT64   , GxB_TIMES_RDIV_UINT64  ,
-GxB_MIN_RDIV_FP32      , GxB_MAX_RDIV_FP32      , GxB_PLUS_RDIV_FP32     , GxB_TIMES_RDIV_FP32    ,
-GxB_MIN_RDIV_FP64      , GxB_MAX_RDIV_FP64      , GxB_PLUS_RDIV_FP64     , GxB_TIMES_RDIV_FP64    ,
-
-// semirings with multiply op: z = ISEQ (x,y), all types x,y,z the same:
-GxB_MIN_ISEQ_INT8      , GxB_MAX_ISEQ_INT8      , GxB_PLUS_ISEQ_INT8     , GxB_TIMES_ISEQ_INT8    ,
-GxB_MIN_ISEQ_UINT8     , GxB_MAX_ISEQ_UINT8     , GxB_PLUS_ISEQ_UINT8    , GxB_TIMES_ISEQ_UINT8   ,
-GxB_MIN_ISEQ_INT16     , GxB_MAX_ISEQ_INT16     , GxB_PLUS_ISEQ_INT16    , GxB_TIMES_ISEQ_INT16   ,
-GxB_MIN_ISEQ_UINT16    , GxB_MAX_ISEQ_UINT16    , GxB_PLUS_ISEQ_UINT16   , GxB_TIMES_ISEQ_UINT16  ,
-GxB_MIN_ISEQ_INT32     , GxB_MAX_ISEQ_INT32     , GxB_PLUS_ISEQ_INT32    , GxB_TIMES_ISEQ_INT32   ,
-GxB_MIN_ISEQ_UINT32    , GxB_MAX_ISEQ_UINT32    , GxB_PLUS_ISEQ_UINT32   , GxB_TIMES_ISEQ_UINT32  ,
-GxB_MIN_ISEQ_INT64     , GxB_MAX_ISEQ_INT64     , GxB_PLUS_ISEQ_INT64    , GxB_TIMES_ISEQ_INT64   ,
-GxB_MIN_ISEQ_UINT64    , GxB_MAX_ISEQ_UINT64    , GxB_PLUS_ISEQ_UINT64   , GxB_TIMES_ISEQ_UINT64  ,
-GxB_MIN_ISEQ_FP32      , GxB_MAX_ISEQ_FP32      , GxB_PLUS_ISEQ_FP32     , GxB_TIMES_ISEQ_FP32    ,
-GxB_MIN_ISEQ_FP64      , GxB_MAX_ISEQ_FP64      , GxB_PLUS_ISEQ_FP64     , GxB_TIMES_ISEQ_FP64    ,
-
-// semirings with multiply op: z = ISNE (x,y), all types x,y,z the same:
-GxB_MIN_ISNE_INT8      , GxB_MAX_ISNE_INT8      , GxB_PLUS_ISNE_INT8     , GxB_TIMES_ISNE_INT8    ,
-GxB_MIN_ISNE_UINT8     , GxB_MAX_ISNE_UINT8     , GxB_PLUS_ISNE_UINT8    , GxB_TIMES_ISNE_UINT8   ,
-GxB_MIN_ISNE_INT16     , GxB_MAX_ISNE_INT16     , GxB_PLUS_ISNE_INT16    , GxB_TIMES_ISNE_INT16   ,
-GxB_MIN_ISNE_UINT16    , GxB_MAX_ISNE_UINT16    , GxB_PLUS_ISNE_UINT16   , GxB_TIMES_ISNE_UINT16  ,
-GxB_MIN_ISNE_INT32     , GxB_MAX_ISNE_INT32     , GxB_PLUS_ISNE_INT32    , GxB_TIMES_ISNE_INT32   ,
-GxB_MIN_ISNE_UINT32    , GxB_MAX_ISNE_UINT32    , GxB_PLUS_ISNE_UINT32   , GxB_TIMES_ISNE_UINT32  ,
-GxB_MIN_ISNE_INT64     , GxB_MAX_ISNE_INT64     , GxB_PLUS_ISNE_INT64    , GxB_TIMES_ISNE_INT64   ,
-GxB_MIN_ISNE_UINT64    , GxB_MAX_ISNE_UINT64    , GxB_PLUS_ISNE_UINT64   , GxB_TIMES_ISNE_UINT64  ,
-GxB_MIN_ISNE_FP32      , GxB_MAX_ISNE_FP32      , GxB_PLUS_ISNE_FP32     , GxB_TIMES_ISNE_FP32    ,
-GxB_MIN_ISNE_FP64      , GxB_MAX_ISNE_FP64      , GxB_PLUS_ISNE_FP64     , GxB_TIMES_ISNE_FP64    ,
-
-// semirings with multiply op: z = ISGT (x,y), all types x,y,z the same:
-GxB_MIN_ISGT_INT8      , GxB_MAX_ISGT_INT8      , GxB_PLUS_ISGT_INT8     , GxB_TIMES_ISGT_INT8    ,
-GxB_MIN_ISGT_UINT8     , GxB_MAX_ISGT_UINT8     , GxB_PLUS_ISGT_UINT8    , GxB_TIMES_ISGT_UINT8   ,
-GxB_MIN_ISGT_INT16     , GxB_MAX_ISGT_INT16     , GxB_PLUS_ISGT_INT16    , GxB_TIMES_ISGT_INT16   ,
-GxB_MIN_ISGT_UINT16    , GxB_MAX_ISGT_UINT16    , GxB_PLUS_ISGT_UINT16   , GxB_TIMES_ISGT_UINT16  ,
-GxB_MIN_ISGT_INT32     , GxB_MAX_ISGT_INT32     , GxB_PLUS_ISGT_INT32    , GxB_TIMES_ISGT_INT32   ,
-GxB_MIN_ISGT_UINT32    , GxB_MAX_ISGT_UINT32    , GxB_PLUS_ISGT_UINT32   , GxB_TIMES_ISGT_UINT32  ,
-GxB_MIN_ISGT_INT64     , GxB_MAX_ISGT_INT64     , GxB_PLUS_ISGT_INT64    , GxB_TIMES_ISGT_INT64   ,
-GxB_MIN_ISGT_UINT64    , GxB_MAX_ISGT_UINT64    , GxB_PLUS_ISGT_UINT64   , GxB_TIMES_ISGT_UINT64  ,
-GxB_MIN_ISGT_FP32      , GxB_MAX_ISGT_FP32      , GxB_PLUS_ISGT_FP32     , GxB_TIMES_ISGT_FP32    ,
-GxB_MIN_ISGT_FP64      , GxB_MAX_ISGT_FP64      , GxB_PLUS_ISGT_FP64     , GxB_TIMES_ISGT_FP64    ,
-
-// semirings with multiply op: z = ISLT (x,y), all types x,y,z the same:
-GxB_MIN_ISLT_INT8      , GxB_MAX_ISLT_INT8      , GxB_PLUS_ISLT_INT8     , GxB_TIMES_ISLT_INT8    ,
-GxB_MIN_ISLT_UINT8     , GxB_MAX_ISLT_UINT8     , GxB_PLUS_ISLT_UINT8    , GxB_TIMES_ISLT_UINT8   ,
-GxB_MIN_ISLT_INT16     , GxB_MAX_ISLT_INT16     , GxB_PLUS_ISLT_INT16    , GxB_TIMES_ISLT_INT16   ,
-GxB_MIN_ISLT_UINT16    , GxB_MAX_ISLT_UINT16    , GxB_PLUS_ISLT_UINT16   , GxB_TIMES_ISLT_UINT16  ,
-GxB_MIN_ISLT_INT32     , GxB_MAX_ISLT_INT32     , GxB_PLUS_ISLT_INT32    , GxB_TIMES_ISLT_INT32   ,
-GxB_MIN_ISLT_UINT32    , GxB_MAX_ISLT_UINT32    , GxB_PLUS_ISLT_UINT32   , GxB_TIMES_ISLT_UINT32  ,
-GxB_MIN_ISLT_INT64     , GxB_MAX_ISLT_INT64     , GxB_PLUS_ISLT_INT64    , GxB_TIMES_ISLT_INT64   ,
-GxB_MIN_ISLT_UINT64    , GxB_MAX_ISLT_UINT64    , GxB_PLUS_ISLT_UINT64   , GxB_TIMES_ISLT_UINT64  ,
-GxB_MIN_ISLT_FP32      , GxB_MAX_ISLT_FP32      , GxB_PLUS_ISLT_FP32     , GxB_TIMES_ISLT_FP32    ,
-GxB_MIN_ISLT_FP64      , GxB_MAX_ISLT_FP64      , GxB_PLUS_ISLT_FP64     , GxB_TIMES_ISLT_FP64    ,
-
-// semirings with multiply op: z = ISGE (x,y), all types x,y,z the same:
-GxB_MIN_ISGE_INT8      , GxB_MAX_ISGE_INT8      , GxB_PLUS_ISGE_INT8     , GxB_TIMES_ISGE_INT8    ,
-GxB_MIN_ISGE_UINT8     , GxB_MAX_ISGE_UINT8     , GxB_PLUS_ISGE_UINT8    , GxB_TIMES_ISGE_UINT8   ,
-GxB_MIN_ISGE_INT16     , GxB_MAX_ISGE_INT16     , GxB_PLUS_ISGE_INT16    , GxB_TIMES_ISGE_INT16   ,
-GxB_MIN_ISGE_UINT16    , GxB_MAX_ISGE_UINT16    , GxB_PLUS_ISGE_UINT16   , GxB_TIMES_ISGE_UINT16  ,
-GxB_MIN_ISGE_INT32     , GxB_MAX_ISGE_INT32     , GxB_PLUS_ISGE_INT32    , GxB_TIMES_ISGE_INT32   ,
-GxB_MIN_ISGE_UINT32    , GxB_MAX_ISGE_UINT32    , GxB_PLUS_ISGE_UINT32   , GxB_TIMES_ISGE_UINT32  ,
-GxB_MIN_ISGE_INT64     , GxB_MAX_ISGE_INT64     , GxB_PLUS_ISGE_INT64    , GxB_TIMES_ISGE_INT64   ,
-GxB_MIN_ISGE_UINT64    , GxB_MAX_ISGE_UINT64    , GxB_PLUS_ISGE_UINT64   , GxB_TIMES_ISGE_UINT64  ,
-GxB_MIN_ISGE_FP32      , GxB_MAX_ISGE_FP32      , GxB_PLUS_ISGE_FP32     , GxB_TIMES_ISGE_FP32    ,
-GxB_MIN_ISGE_FP64      , GxB_MAX_ISGE_FP64      , GxB_PLUS_ISGE_FP64     , GxB_TIMES_ISGE_FP64    ,
-
-// semirings with multiply op: z = ISLE (x,y), all types x,y,z the same:
-GxB_MIN_ISLE_INT8      , GxB_MAX_ISLE_INT8      , GxB_PLUS_ISLE_INT8     , GxB_TIMES_ISLE_INT8    ,
-GxB_MIN_ISLE_UINT8     , GxB_MAX_ISLE_UINT8     , GxB_PLUS_ISLE_UINT8    , GxB_TIMES_ISLE_UINT8   ,
-GxB_MIN_ISLE_INT16     , GxB_MAX_ISLE_INT16     , GxB_PLUS_ISLE_INT16    , GxB_TIMES_ISLE_INT16   ,
-GxB_MIN_ISLE_UINT16    , GxB_MAX_ISLE_UINT16    , GxB_PLUS_ISLE_UINT16   , GxB_TIMES_ISLE_UINT16  ,
-GxB_MIN_ISLE_INT32     , GxB_MAX_ISLE_INT32     , GxB_PLUS_ISLE_INT32    , GxB_TIMES_ISLE_INT32   ,
-GxB_MIN_ISLE_UINT32    , GxB_MAX_ISLE_UINT32    , GxB_PLUS_ISLE_UINT32   , GxB_TIMES_ISLE_UINT32  ,
-GxB_MIN_ISLE_INT64     , GxB_MAX_ISLE_INT64     , GxB_PLUS_ISLE_INT64    , GxB_TIMES_ISLE_INT64   ,
-GxB_MIN_ISLE_UINT64    , GxB_MAX_ISLE_UINT64    , GxB_PLUS_ISLE_UINT64   , GxB_TIMES_ISLE_UINT64  ,
-GxB_MIN_ISLE_FP32      , GxB_MAX_ISLE_FP32      , GxB_PLUS_ISLE_FP32     , GxB_TIMES_ISLE_FP32    ,
-GxB_MIN_ISLE_FP64      , GxB_MAX_ISLE_FP64      , GxB_PLUS_ISLE_FP64     , GxB_TIMES_ISLE_FP64    ,
-
-// semirings with multiply op: z = LOR (x,y), all types x,y,z the same:
-GxB_MIN_LOR_INT8       , GxB_MAX_LOR_INT8       , GxB_PLUS_LOR_INT8      , GxB_TIMES_LOR_INT8     ,
-GxB_MIN_LOR_UINT8      , GxB_MAX_LOR_UINT8      , GxB_PLUS_LOR_UINT8     , GxB_TIMES_LOR_UINT8    ,
-GxB_MIN_LOR_INT16      , GxB_MAX_LOR_INT16      , GxB_PLUS_LOR_INT16     , GxB_TIMES_LOR_INT16    ,
-GxB_MIN_LOR_UINT16     , GxB_MAX_LOR_UINT16     , GxB_PLUS_LOR_UINT16    , GxB_TIMES_LOR_UINT16   ,
-GxB_MIN_LOR_INT32      , GxB_MAX_LOR_INT32      , GxB_PLUS_LOR_INT32     , GxB_TIMES_LOR_INT32    ,
-GxB_MIN_LOR_UINT32     , GxB_MAX_LOR_UINT32     , GxB_PLUS_LOR_UINT32    , GxB_TIMES_LOR_UINT32   ,
-GxB_MIN_LOR_INT64      , GxB_MAX_LOR_INT64      , GxB_PLUS_LOR_INT64     , GxB_TIMES_LOR_INT64    ,
-GxB_MIN_LOR_UINT64     , GxB_MAX_LOR_UINT64     , GxB_PLUS_LOR_UINT64    , GxB_TIMES_LOR_UINT64   ,
-GxB_MIN_LOR_FP32       , GxB_MAX_LOR_FP32       , GxB_PLUS_LOR_FP32      , GxB_TIMES_LOR_FP32     ,
-GxB_MIN_LOR_FP64       , GxB_MAX_LOR_FP64       , GxB_PLUS_LOR_FP64      , GxB_TIMES_LOR_FP64     ,
-
-// semirings with multiply op: z = LAND (x,y), all types x,y,z the same:
-GxB_MIN_LAND_INT8      , GxB_MAX_LAND_INT8      , GxB_PLUS_LAND_INT8     , GxB_TIMES_LAND_INT8    ,
-GxB_MIN_LAND_UINT8     , GxB_MAX_LAND_UINT8     , GxB_PLUS_LAND_UINT8    , GxB_TIMES_LAND_UINT8   ,
-GxB_MIN_LAND_INT16     , GxB_MAX_LAND_INT16     , GxB_PLUS_LAND_INT16    , GxB_TIMES_LAND_INT16   ,
-GxB_MIN_LAND_UINT16    , GxB_MAX_LAND_UINT16    , GxB_PLUS_LAND_UINT16   , GxB_TIMES_LAND_UINT16  ,
-GxB_MIN_LAND_INT32     , GxB_MAX_LAND_INT32     , GxB_PLUS_LAND_INT32    , GxB_TIMES_LAND_INT32   ,
-GxB_MIN_LAND_UINT32    , GxB_MAX_LAND_UINT32    , GxB_PLUS_LAND_UINT32   , GxB_TIMES_LAND_UINT32  ,
-GxB_MIN_LAND_INT64     , GxB_MAX_LAND_INT64     , GxB_PLUS_LAND_INT64    , GxB_TIMES_LAND_INT64   ,
-GxB_MIN_LAND_UINT64    , GxB_MAX_LAND_UINT64    , GxB_PLUS_LAND_UINT64   , GxB_TIMES_LAND_UINT64  ,
-GxB_MIN_LAND_FP32      , GxB_MAX_LAND_FP32      , GxB_PLUS_LAND_FP32     , GxB_TIMES_LAND_FP32    ,
-GxB_MIN_LAND_FP64      , GxB_MAX_LAND_FP64      , GxB_PLUS_LAND_FP64     , GxB_TIMES_LAND_FP64    ,
-
-// semirings with multiply op: z = LXOR (x,y), all types x,y,z the same:
-GxB_MIN_LXOR_INT8      , GxB_MAX_LXOR_INT8      , GxB_PLUS_LXOR_INT8     , GxB_TIMES_LXOR_INT8    ,
-GxB_MIN_LXOR_UINT8     , GxB_MAX_LXOR_UINT8     , GxB_PLUS_LXOR_UINT8    , GxB_TIMES_LXOR_UINT8   ,
-GxB_MIN_LXOR_INT16     , GxB_MAX_LXOR_INT16     , GxB_PLUS_LXOR_INT16    , GxB_TIMES_LXOR_INT16   ,
-GxB_MIN_LXOR_UINT16    , GxB_MAX_LXOR_UINT16    , GxB_PLUS_LXOR_UINT16   , GxB_TIMES_LXOR_UINT16  ,
-GxB_MIN_LXOR_INT32     , GxB_MAX_LXOR_INT32     , GxB_PLUS_LXOR_INT32    , GxB_TIMES_LXOR_INT32   ,
-GxB_MIN_LXOR_UINT32    , GxB_MAX_LXOR_UINT32    , GxB_PLUS_LXOR_UINT32   , GxB_TIMES_LXOR_UINT32  ,
-GxB_MIN_LXOR_INT64     , GxB_MAX_LXOR_INT64     , GxB_PLUS_LXOR_INT64    , GxB_TIMES_LXOR_INT64   ,
-GxB_MIN_LXOR_UINT64    , GxB_MAX_LXOR_UINT64    , GxB_PLUS_LXOR_UINT64   , GxB_TIMES_LXOR_UINT64  ,
-GxB_MIN_LXOR_FP32      , GxB_MAX_LXOR_FP32      , GxB_PLUS_LXOR_FP32     , GxB_TIMES_LXOR_FP32    ,
-GxB_MIN_LXOR_FP64      , GxB_MAX_LXOR_FP64      , GxB_PLUS_LXOR_FP64     , GxB_TIMES_LXOR_FP64    ,
-
-//------------------------------------------------------------------------------
-// 240 semirings with comparison ops of the form TxT->bool, and Boolean monoids
-//------------------------------------------------------------------------------
-
-// semirings with multiply op: z = EQ (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_EQ_INT8        , GxB_LAND_EQ_INT8       , GxB_LXOR_EQ_INT8       , GxB_EQ_EQ_INT8         ,
-GxB_LOR_EQ_UINT8       , GxB_LAND_EQ_UINT8      , GxB_LXOR_EQ_UINT8      , GxB_EQ_EQ_UINT8        ,
-GxB_LOR_EQ_INT16       , GxB_LAND_EQ_INT16      , GxB_LXOR_EQ_INT16      , GxB_EQ_EQ_INT16        ,
-GxB_LOR_EQ_UINT16      , GxB_LAND_EQ_UINT16     , GxB_LXOR_EQ_UINT16     , GxB_EQ_EQ_UINT16       ,
-GxB_LOR_EQ_INT32       , GxB_LAND_EQ_INT32      , GxB_LXOR_EQ_INT32      , GxB_EQ_EQ_INT32        ,
-GxB_LOR_EQ_UINT32      , GxB_LAND_EQ_UINT32     , GxB_LXOR_EQ_UINT32     , GxB_EQ_EQ_UINT32       ,
-GxB_LOR_EQ_INT64       , GxB_LAND_EQ_INT64      , GxB_LXOR_EQ_INT64      , GxB_EQ_EQ_INT64        ,
-GxB_LOR_EQ_UINT64      , GxB_LAND_EQ_UINT64     , GxB_LXOR_EQ_UINT64     , GxB_EQ_EQ_UINT64       ,
-GxB_LOR_EQ_FP32        , GxB_LAND_EQ_FP32       , GxB_LXOR_EQ_FP32       , GxB_EQ_EQ_FP32         ,
-GxB_LOR_EQ_FP64        , GxB_LAND_EQ_FP64       , GxB_LXOR_EQ_FP64       , GxB_EQ_EQ_FP64         ,
-
-// semirings with multiply op: z = NE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_NE_INT8        , GxB_LAND_NE_INT8       , GxB_LXOR_NE_INT8       , GxB_EQ_NE_INT8         ,
-GxB_LOR_NE_UINT8       , GxB_LAND_NE_UINT8      , GxB_LXOR_NE_UINT8      , GxB_EQ_NE_UINT8        ,
-GxB_LOR_NE_INT16       , GxB_LAND_NE_INT16      , GxB_LXOR_NE_INT16      , GxB_EQ_NE_INT16        ,
-GxB_LOR_NE_UINT16      , GxB_LAND_NE_UINT16     , GxB_LXOR_NE_UINT16     , GxB_EQ_NE_UINT16       ,
-GxB_LOR_NE_INT32       , GxB_LAND_NE_INT32      , GxB_LXOR_NE_INT32      , GxB_EQ_NE_INT32        ,
-GxB_LOR_NE_UINT32      , GxB_LAND_NE_UINT32     , GxB_LXOR_NE_UINT32     , GxB_EQ_NE_UINT32       ,
-GxB_LOR_NE_INT64       , GxB_LAND_NE_INT64      , GxB_LXOR_NE_INT64      , GxB_EQ_NE_INT64        ,
-GxB_LOR_NE_UINT64      , GxB_LAND_NE_UINT64     , GxB_LXOR_NE_UINT64     , GxB_EQ_NE_UINT64       ,
-GxB_LOR_NE_FP32        , GxB_LAND_NE_FP32       , GxB_LXOR_NE_FP32       , GxB_EQ_NE_FP32         ,
-GxB_LOR_NE_FP64        , GxB_LAND_NE_FP64       , GxB_LXOR_NE_FP64       , GxB_EQ_NE_FP64         ,
-
-// semirings with multiply op: z = GT (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_GT_INT8        , GxB_LAND_GT_INT8       , GxB_LXOR_GT_INT8       , GxB_EQ_GT_INT8         ,
-GxB_LOR_GT_UINT8       , GxB_LAND_GT_UINT8      , GxB_LXOR_GT_UINT8      , GxB_EQ_GT_UINT8        ,
-GxB_LOR_GT_INT16       , GxB_LAND_GT_INT16      , GxB_LXOR_GT_INT16      , GxB_EQ_GT_INT16        ,
-GxB_LOR_GT_UINT16      , GxB_LAND_GT_UINT16     , GxB_LXOR_GT_UINT16     , GxB_EQ_GT_UINT16       ,
-GxB_LOR_GT_INT32       , GxB_LAND_GT_INT32      , GxB_LXOR_GT_INT32      , GxB_EQ_GT_INT32        ,
-GxB_LOR_GT_UINT32      , GxB_LAND_GT_UINT32     , GxB_LXOR_GT_UINT32     , GxB_EQ_GT_UINT32       ,
-GxB_LOR_GT_INT64       , GxB_LAND_GT_INT64      , GxB_LXOR_GT_INT64      , GxB_EQ_GT_INT64        ,
-GxB_LOR_GT_UINT64      , GxB_LAND_GT_UINT64     , GxB_LXOR_GT_UINT64     , GxB_EQ_GT_UINT64       ,
-GxB_LOR_GT_FP32        , GxB_LAND_GT_FP32       , GxB_LXOR_GT_FP32       , GxB_EQ_GT_FP32         ,
-GxB_LOR_GT_FP64        , GxB_LAND_GT_FP64       , GxB_LXOR_GT_FP64       , GxB_EQ_GT_FP64         ,
-
-// semirings with multiply op: z = LT (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_LT_INT8        , GxB_LAND_LT_INT8       , GxB_LXOR_LT_INT8       , GxB_EQ_LT_INT8         ,
-GxB_LOR_LT_UINT8       , GxB_LAND_LT_UINT8      , GxB_LXOR_LT_UINT8      , GxB_EQ_LT_UINT8        ,
-GxB_LOR_LT_INT16       , GxB_LAND_LT_INT16      , GxB_LXOR_LT_INT16      , GxB_EQ_LT_INT16        ,
-GxB_LOR_LT_UINT16      , GxB_LAND_LT_UINT16     , GxB_LXOR_LT_UINT16     , GxB_EQ_LT_UINT16       ,
-GxB_LOR_LT_INT32       , GxB_LAND_LT_INT32      , GxB_LXOR_LT_INT32      , GxB_EQ_LT_INT32        ,
-GxB_LOR_LT_UINT32      , GxB_LAND_LT_UINT32     , GxB_LXOR_LT_UINT32     , GxB_EQ_LT_UINT32       ,
-GxB_LOR_LT_INT64       , GxB_LAND_LT_INT64      , GxB_LXOR_LT_INT64      , GxB_EQ_LT_INT64        ,
-GxB_LOR_LT_UINT64      , GxB_LAND_LT_UINT64     , GxB_LXOR_LT_UINT64     , GxB_EQ_LT_UINT64       ,
-GxB_LOR_LT_FP32        , GxB_LAND_LT_FP32       , GxB_LXOR_LT_FP32       , GxB_EQ_LT_FP32         ,
-GxB_LOR_LT_FP64        , GxB_LAND_LT_FP64       , GxB_LXOR_LT_FP64       , GxB_EQ_LT_FP64         ,
-
-// semirings with multiply op: z = GE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_GE_INT8        , GxB_LAND_GE_INT8       , GxB_LXOR_GE_INT8       , GxB_EQ_GE_INT8         ,
-GxB_LOR_GE_UINT8       , GxB_LAND_GE_UINT8      , GxB_LXOR_GE_UINT8      , GxB_EQ_GE_UINT8        ,
-GxB_LOR_GE_INT16       , GxB_LAND_GE_INT16      , GxB_LXOR_GE_INT16      , GxB_EQ_GE_INT16        ,
-GxB_LOR_GE_UINT16      , GxB_LAND_GE_UINT16     , GxB_LXOR_GE_UINT16     , GxB_EQ_GE_UINT16       ,
-GxB_LOR_GE_INT32       , GxB_LAND_GE_INT32      , GxB_LXOR_GE_INT32      , GxB_EQ_GE_INT32        ,
-GxB_LOR_GE_UINT32      , GxB_LAND_GE_UINT32     , GxB_LXOR_GE_UINT32     , GxB_EQ_GE_UINT32       ,
-GxB_LOR_GE_INT64       , GxB_LAND_GE_INT64      , GxB_LXOR_GE_INT64      , GxB_EQ_GE_INT64        ,
-GxB_LOR_GE_UINT64      , GxB_LAND_GE_UINT64     , GxB_LXOR_GE_UINT64     , GxB_EQ_GE_UINT64       ,
-GxB_LOR_GE_FP32        , GxB_LAND_GE_FP32       , GxB_LXOR_GE_FP32       , GxB_EQ_GE_FP32         ,
-GxB_LOR_GE_FP64        , GxB_LAND_GE_FP64       , GxB_LXOR_GE_FP64       , GxB_EQ_GE_FP64         ,
-
-// semirings with multiply op: z = LE (x,y), where z is Boolean and x,y are given by the suffix:
-GxB_LOR_LE_INT8        , GxB_LAND_LE_INT8       , GxB_LXOR_LE_INT8       , GxB_EQ_LE_INT8         ,
-GxB_LOR_LE_UINT8       , GxB_LAND_LE_UINT8      , GxB_LXOR_LE_UINT8      , GxB_EQ_LE_UINT8        ,
-GxB_LOR_LE_INT16       , GxB_LAND_LE_INT16      , GxB_LXOR_LE_INT16      , GxB_EQ_LE_INT16        ,
-GxB_LOR_LE_UINT16      , GxB_LAND_LE_UINT16     , GxB_LXOR_LE_UINT16     , GxB_EQ_LE_UINT16       ,
-GxB_LOR_LE_INT32       , GxB_LAND_LE_INT32      , GxB_LXOR_LE_INT32      , GxB_EQ_LE_INT32        ,
-GxB_LOR_LE_UINT32      , GxB_LAND_LE_UINT32     , GxB_LXOR_LE_UINT32     , GxB_EQ_LE_UINT32       ,
-GxB_LOR_LE_INT64       , GxB_LAND_LE_INT64      , GxB_LXOR_LE_INT64      , GxB_EQ_LE_INT64        ,
-GxB_LOR_LE_UINT64      , GxB_LAND_LE_UINT64     , GxB_LXOR_LE_UINT64     , GxB_EQ_LE_UINT64       ,
-GxB_LOR_LE_FP32        , GxB_LAND_LE_FP32       , GxB_LXOR_LE_FP32       , GxB_EQ_LE_FP32         ,
-GxB_LOR_LE_FP64        , GxB_LAND_LE_FP64       , GxB_LXOR_LE_FP64       , GxB_EQ_LE_FP64         ,
-
-//------------------------------------------------------------------------------
-// 40 purely Boolean semirings
-//------------------------------------------------------------------------------
-
-// purely boolean semirings (in the form GxB_(add monoid)_(multipy operator)_BOOL:
-GxB_LOR_FIRST_BOOL     , GxB_LAND_FIRST_BOOL    , GxB_LXOR_FIRST_BOOL    , GxB_EQ_FIRST_BOOL      ,
-GxB_LOR_SECOND_BOOL    , GxB_LAND_SECOND_BOOL   , GxB_LXOR_SECOND_BOOL   , GxB_EQ_SECOND_BOOL     ,
-GxB_LOR_LOR_BOOL       , GxB_LAND_LOR_BOOL      , GxB_LXOR_LOR_BOOL      , GxB_EQ_LOR_BOOL        ,
-GxB_LOR_LAND_BOOL      , GxB_LAND_LAND_BOOL     , GxB_LXOR_LAND_BOOL     , GxB_EQ_LAND_BOOL       ,
-GxB_LOR_LXOR_BOOL      , GxB_LAND_LXOR_BOOL     , GxB_LXOR_LXOR_BOOL     , GxB_EQ_LXOR_BOOL       ,
-GxB_LOR_EQ_BOOL        , GxB_LAND_EQ_BOOL       , GxB_LXOR_EQ_BOOL       , GxB_EQ_EQ_BOOL         ,
-GxB_LOR_GT_BOOL        , GxB_LAND_GT_BOOL       , GxB_LXOR_GT_BOOL       , GxB_EQ_GT_BOOL         ,
-GxB_LOR_LT_BOOL        , GxB_LAND_LT_BOOL       , GxB_LXOR_LT_BOOL       , GxB_EQ_LT_BOOL         ,
-GxB_LOR_GE_BOOL        , GxB_LAND_GE_BOOL       , GxB_LXOR_GE_BOOL       , GxB_EQ_GE_BOOL         ,
-GxB_LOR_LE_BOOL        , GxB_LAND_LE_BOOL       , GxB_LXOR_LE_BOOL       , GxB_EQ_LE_BOOL         ;
+// 1000 non-Boolean semirings where all types are the same, given by suffix _T
+//------------------------------------------------------------------------------
+
+    // semirings with multiply op: z = FIRST (x,y), all types x,y,z the same:
+    GxB_MIN_FIRST_INT8     , GxB_MAX_FIRST_INT8     , GxB_PLUS_FIRST_INT8    , GxB_TIMES_FIRST_INT8   , GxB_ANY_FIRST_INT8     , 
+    GxB_MIN_FIRST_UINT8    , GxB_MAX_FIRST_UINT8    , GxB_PLUS_FIRST_UINT8   , GxB_TIMES_FIRST_UINT8  , GxB_ANY_FIRST_UINT8    , 
+    GxB_MIN_FIRST_INT16    , GxB_MAX_FIRST_INT16    , GxB_PLUS_FIRST_INT16   , GxB_TIMES_FIRST_INT16  , GxB_ANY_FIRST_INT16    , 
+    GxB_MIN_FIRST_UINT16   , GxB_MAX_FIRST_UINT16   , GxB_PLUS_FIRST_UINT16  , GxB_TIMES_FIRST_UINT16 , GxB_ANY_FIRST_UINT16   , 
+    GxB_MIN_FIRST_INT32    , GxB_MAX_FIRST_INT32    , GxB_PLUS_FIRST_INT32   , GxB_TIMES_FIRST_INT32  , GxB_ANY_FIRST_INT32    , 
+    GxB_MIN_FIRST_UINT32   , GxB_MAX_FIRST_UINT32   , GxB_PLUS_FIRST_UINT32  , GxB_TIMES_FIRST_UINT32 , GxB_ANY_FIRST_UINT32   , 
+    GxB_MIN_FIRST_INT64    , GxB_MAX_FIRST_INT64    , GxB_PLUS_FIRST_INT64   , GxB_TIMES_FIRST_INT64  , GxB_ANY_FIRST_INT64    , 
+    GxB_MIN_FIRST_UINT64   , GxB_MAX_FIRST_UINT64   , GxB_PLUS_FIRST_UINT64  , GxB_TIMES_FIRST_UINT64 , GxB_ANY_FIRST_UINT64   , 
+    GxB_MIN_FIRST_FP32     , GxB_MAX_FIRST_FP32     , GxB_PLUS_FIRST_FP32    , GxB_TIMES_FIRST_FP32   , GxB_ANY_FIRST_FP32     , 
+    GxB_MIN_FIRST_FP64     , GxB_MAX_FIRST_FP64     , GxB_PLUS_FIRST_FP64    , GxB_TIMES_FIRST_FP64   , GxB_ANY_FIRST_FP64     , 
+
+    // semirings with multiply op: z = SECOND (x,y), all types x,y,z the same:
+    GxB_MIN_SECOND_INT8    , GxB_MAX_SECOND_INT8    , GxB_PLUS_SECOND_INT8   , GxB_TIMES_SECOND_INT8  , GxB_ANY_SECOND_INT8    , 
+    GxB_MIN_SECOND_UINT8   , GxB_MAX_SECOND_UINT8   , GxB_PLUS_SECOND_UINT8  , GxB_TIMES_SECOND_UINT8 , GxB_ANY_SECOND_UINT8   , 
+    GxB_MIN_SECOND_INT16   , GxB_MAX_SECOND_INT16   , GxB_PLUS_SECOND_INT16  , GxB_TIMES_SECOND_INT16 , GxB_ANY_SECOND_INT16   , 
+    GxB_MIN_SECOND_UINT16  , GxB_MAX_SECOND_UINT16  , GxB_PLUS_SECOND_UINT16 , GxB_TIMES_SECOND_UINT16, GxB_ANY_SECOND_UINT16  , 
+    GxB_MIN_SECOND_INT32   , GxB_MAX_SECOND_INT32   , GxB_PLUS_SECOND_INT32  , GxB_TIMES_SECOND_INT32 , GxB_ANY_SECOND_INT32   , 
+    GxB_MIN_SECOND_UINT32  , GxB_MAX_SECOND_UINT32  , GxB_PLUS_SECOND_UINT32 , GxB_TIMES_SECOND_UINT32, GxB_ANY_SECOND_UINT32  , 
+    GxB_MIN_SECOND_INT64   , GxB_MAX_SECOND_INT64   , GxB_PLUS_SECOND_INT64  , GxB_TIMES_SECOND_INT64 , GxB_ANY_SECOND_INT64   , 
+    GxB_MIN_SECOND_UINT64  , GxB_MAX_SECOND_UINT64  , GxB_PLUS_SECOND_UINT64 , GxB_TIMES_SECOND_UINT64, GxB_ANY_SECOND_UINT64  , 
+    GxB_MIN_SECOND_FP32    , GxB_MAX_SECOND_FP32    , GxB_PLUS_SECOND_FP32   , GxB_TIMES_SECOND_FP32  , GxB_ANY_SECOND_FP32    , 
+    GxB_MIN_SECOND_FP64    , GxB_MAX_SECOND_FP64    , GxB_PLUS_SECOND_FP64   , GxB_TIMES_SECOND_FP64  , GxB_ANY_SECOND_FP64    , 
+
+    // semirings with multiply op: z = PAIR (x,y), all types x,y,z the same:
+    GxB_MIN_PAIR_INT8      , GxB_MAX_PAIR_INT8      , GxB_PLUS_PAIR_INT8     , GxB_TIMES_PAIR_INT8    , GxB_ANY_PAIR_INT8      , 
+    GxB_MIN_PAIR_UINT8     , GxB_MAX_PAIR_UINT8     , GxB_PLUS_PAIR_UINT8    , GxB_TIMES_PAIR_UINT8   , GxB_ANY_PAIR_UINT8     , 
+    GxB_MIN_PAIR_INT16     , GxB_MAX_PAIR_INT16     , GxB_PLUS_PAIR_INT16    , GxB_TIMES_PAIR_INT16   , GxB_ANY_PAIR_INT16     , 
+    GxB_MIN_PAIR_UINT16    , GxB_MAX_PAIR_UINT16    , GxB_PLUS_PAIR_UINT16   , GxB_TIMES_PAIR_UINT16  , GxB_ANY_PAIR_UINT16    , 
+    GxB_MIN_PAIR_INT32     , GxB_MAX_PAIR_INT32     , GxB_PLUS_PAIR_INT32    , GxB_TIMES_PAIR_INT32   , GxB_ANY_PAIR_INT32     , 
+    GxB_MIN_PAIR_UINT32    , GxB_MAX_PAIR_UINT32    , GxB_PLUS_PAIR_UINT32   , GxB_TIMES_PAIR_UINT32  , GxB_ANY_PAIR_UINT32    , 
+    GxB_MIN_PAIR_INT64     , GxB_MAX_PAIR_INT64     , GxB_PLUS_PAIR_INT64    , GxB_TIMES_PAIR_INT64   , GxB_ANY_PAIR_INT64     , 
+    GxB_MIN_PAIR_UINT64    , GxB_MAX_PAIR_UINT64    , GxB_PLUS_PAIR_UINT64   , GxB_TIMES_PAIR_UINT64  , GxB_ANY_PAIR_UINT64    , 
+    GxB_MIN_PAIR_FP32      , GxB_MAX_PAIR_FP32      , GxB_PLUS_PAIR_FP32     , GxB_TIMES_PAIR_FP32    , GxB_ANY_PAIR_FP32      , 
+    GxB_MIN_PAIR_FP64      , GxB_MAX_PAIR_FP64      , GxB_PLUS_PAIR_FP64     , GxB_TIMES_PAIR_FP64    , GxB_ANY_PAIR_FP64      , 
+
+    // semirings with multiply op: z = MIN (x,y), all types x,y,z the same:
+    GxB_MIN_MIN_INT8       , GxB_MAX_MIN_INT8       , GxB_PLUS_MIN_INT8      , GxB_TIMES_MIN_INT8     , GxB_ANY_MIN_INT8       , 
+    GxB_MIN_MIN_UINT8      , GxB_MAX_MIN_UINT8      , GxB_PLUS_MIN_UINT8     , GxB_TIMES_MIN_UINT8    , GxB_ANY_MIN_UINT8      , 
+    GxB_MIN_MIN_INT16      , GxB_MAX_MIN_INT16      , GxB_PLUS_MIN_INT16     , GxB_TIMES_MIN_INT16    , GxB_ANY_MIN_INT16      , 
+    GxB_MIN_MIN_UINT16     , GxB_MAX_MIN_UINT16     , GxB_PLUS_MIN_UINT16    , GxB_TIMES_MIN_UINT16   , GxB_ANY_MIN_UINT16     , 
+    GxB_MIN_MIN_INT32      , GxB_MAX_MIN_INT32      , GxB_PLUS_MIN_INT32     , GxB_TIMES_MIN_INT32    , GxB_ANY_MIN_INT32      , 
+    GxB_MIN_MIN_UINT32     , GxB_MAX_MIN_UINT32     , GxB_PLUS_MIN_UINT32    , GxB_TIMES_MIN_UINT32   , GxB_ANY_MIN_UINT32     , 
+    GxB_MIN_MIN_INT64      , GxB_MAX_MIN_INT64      , GxB_PLUS_MIN_INT64     , GxB_TIMES_MIN_INT64    , GxB_ANY_MIN_INT64      , 
+    GxB_MIN_MIN_UINT64     , GxB_MAX_MIN_UINT64     , GxB_PLUS_MIN_UINT64    , GxB_TIMES_MIN_UINT64   , GxB_ANY_MIN_UINT64     , 
+    GxB_MIN_MIN_FP32       , GxB_MAX_MIN_FP32       , GxB_PLUS_MIN_FP32      , GxB_TIMES_MIN_FP32     , GxB_ANY_MIN_FP32       , 
+    GxB_MIN_MIN_FP64       , GxB_MAX_MIN_FP64       , GxB_PLUS_MIN_FP64      , GxB_TIMES_MIN_FP64     , GxB_ANY_MIN_FP64       , 
+
+    // semirings with multiply op: z = MAX (x,y), all types x,y,z the same:
+    GxB_MIN_MAX_INT8       , GxB_MAX_MAX_INT8       , GxB_PLUS_MAX_INT8      , GxB_TIMES_MAX_INT8     , GxB_ANY_MAX_INT8       , 
+    GxB_MIN_MAX_UINT8      , GxB_MAX_MAX_UINT8      , GxB_PLUS_MAX_UINT8     , GxB_TIMES_MAX_UINT8    , GxB_ANY_MAX_UINT8      , 
+    GxB_MIN_MAX_INT16      , GxB_MAX_MAX_INT16      , GxB_PLUS_MAX_INT16     , GxB_TIMES_MAX_INT16    , GxB_ANY_MAX_INT16      , 
+    GxB_MIN_MAX_UINT16     , GxB_MAX_MAX_UINT16     , GxB_PLUS_MAX_UINT16    , GxB_TIMES_MAX_UINT16   , GxB_ANY_MAX_UINT16     , 
+    GxB_MIN_MAX_INT32      , GxB_MAX_MAX_INT32      , GxB_PLUS_MAX_INT32     , GxB_TIMES_MAX_INT32    , GxB_ANY_MAX_INT32      , 
+    GxB_MIN_MAX_UINT32     , GxB_MAX_MAX_UINT32     , GxB_PLUS_MAX_UINT32    , GxB_TIMES_MAX_UINT32   , GxB_ANY_MAX_UINT32     , 
+    GxB_MIN_MAX_INT64      , GxB_MAX_MAX_INT64      , GxB_PLUS_MAX_INT64     , GxB_TIMES_MAX_INT64    , GxB_ANY_MAX_INT64      , 
+    GxB_MIN_MAX_UINT64     , GxB_MAX_MAX_UINT64     , GxB_PLUS_MAX_UINT64    , GxB_TIMES_MAX_UINT64   , GxB_ANY_MAX_UINT64     , 
+    GxB_MIN_MAX_FP32       , GxB_MAX_MAX_FP32       , GxB_PLUS_MAX_FP32      , GxB_TIMES_MAX_FP32     , GxB_ANY_MAX_FP32       , 
+    GxB_MIN_MAX_FP64       , GxB_MAX_MAX_FP64       , GxB_PLUS_MAX_FP64      , GxB_TIMES_MAX_FP64     , GxB_ANY_MAX_FP64       , 
+
+    // semirings with multiply op: z = PLUS (x,y), all types x,y,z the same:
+    GxB_MIN_PLUS_INT8      , GxB_MAX_PLUS_INT8      , GxB_PLUS_PLUS_INT8     , GxB_TIMES_PLUS_INT8    , GxB_ANY_PLUS_INT8      , 
+    GxB_MIN_PLUS_UINT8     , GxB_MAX_PLUS_UINT8     , GxB_PLUS_PLUS_UINT8    , GxB_TIMES_PLUS_UINT8   , GxB_ANY_PLUS_UINT8     , 
+    GxB_MIN_PLUS_INT16     , GxB_MAX_PLUS_INT16     , GxB_PLUS_PLUS_INT16    , GxB_TIMES_PLUS_INT16   , GxB_ANY_PLUS_INT16     , 
+    GxB_MIN_PLUS_UINT16    , GxB_MAX_PLUS_UINT16    , GxB_PLUS_PLUS_UINT16   , GxB_TIMES_PLUS_UINT16  , GxB_ANY_PLUS_UINT16    , 
+    GxB_MIN_PLUS_INT32     , GxB_MAX_PLUS_INT32     , GxB_PLUS_PLUS_INT32    , GxB_TIMES_PLUS_INT32   , GxB_ANY_PLUS_INT32     , 
+    GxB_MIN_PLUS_UINT32    , GxB_MAX_PLUS_UINT32    , GxB_PLUS_PLUS_UINT32   , GxB_TIMES_PLUS_UINT32  , GxB_ANY_PLUS_UINT32    , 
+    GxB_MIN_PLUS_INT64     , GxB_MAX_PLUS_INT64     , GxB_PLUS_PLUS_INT64    , GxB_TIMES_PLUS_INT64   , GxB_ANY_PLUS_INT64     , 
+    GxB_MIN_PLUS_UINT64    , GxB_MAX_PLUS_UINT64    , GxB_PLUS_PLUS_UINT64   , GxB_TIMES_PLUS_UINT64  , GxB_ANY_PLUS_UINT64    , 
+    GxB_MIN_PLUS_FP32      , GxB_MAX_PLUS_FP32      , GxB_PLUS_PLUS_FP32     , GxB_TIMES_PLUS_FP32    , GxB_ANY_PLUS_FP32      , 
+    GxB_MIN_PLUS_FP64      , GxB_MAX_PLUS_FP64      , GxB_PLUS_PLUS_FP64     , GxB_TIMES_PLUS_FP64    , GxB_ANY_PLUS_FP64      , 
+
+    // semirings with multiply op: z = MINUS (x,y), all types x,y,z the same:
+    GxB_MIN_MINUS_INT8     , GxB_MAX_MINUS_INT8     , GxB_PLUS_MINUS_INT8    , GxB_TIMES_MINUS_INT8   , GxB_ANY_MINUS_INT8     , 
+    GxB_MIN_MINUS_UINT8    , GxB_MAX_MINUS_UINT8    , GxB_PLUS_MINUS_UINT8   , GxB_TIMES_MINUS_UINT8  , GxB_ANY_MINUS_UINT8    , 
+    GxB_MIN_MINUS_INT16    , GxB_MAX_MINUS_INT16    , GxB_PLUS_MINUS_INT16   , GxB_TIMES_MINUS_INT16  , GxB_ANY_MINUS_INT16    , 
+    GxB_MIN_MINUS_UINT16   , GxB_MAX_MINUS_UINT16   , GxB_PLUS_MINUS_UINT16  , GxB_TIMES_MINUS_UINT16 , GxB_ANY_MINUS_UINT16   , 
+    GxB_MIN_MINUS_INT32    , GxB_MAX_MINUS_INT32    , GxB_PLUS_MINUS_INT32   , GxB_TIMES_MINUS_INT32  , GxB_ANY_MINUS_INT32    , 
+    GxB_MIN_MINUS_UINT32   , GxB_MAX_MINUS_UINT32   , GxB_PLUS_MINUS_UINT32  , GxB_TIMES_MINUS_UINT32 , GxB_ANY_MINUS_UINT32   , 
+    GxB_MIN_MINUS_INT64    , GxB_MAX_MINUS_INT64    , GxB_PLUS_MINUS_INT64   , GxB_TIMES_MINUS_INT64  , GxB_ANY_MINUS_INT64    , 
+    GxB_MIN_MINUS_UINT64   , GxB_MAX_MINUS_UINT64   , GxB_PLUS_MINUS_UINT64  , GxB_TIMES_MINUS_UINT64 , GxB_ANY_MINUS_UINT64   , 
+    GxB_MIN_MINUS_FP32     , GxB_MAX_MINUS_FP32     , GxB_PLUS_MINUS_FP32    , GxB_TIMES_MINUS_FP32   , GxB_ANY_MINUS_FP32     , 
+    GxB_MIN_MINUS_FP64     , GxB_MAX_MINUS_FP64     , GxB_PLUS_MINUS_FP64    , GxB_TIMES_MINUS_FP64   , GxB_ANY_MINUS_FP64     , 
+
+    // semirings with multiply op: z = TIMES (x,y), all types x,y,z the same:
+    GxB_MIN_TIMES_INT8     , GxB_MAX_TIMES_INT8     , GxB_PLUS_TIMES_INT8    , GxB_TIMES_TIMES_INT8   , GxB_ANY_TIMES_INT8     , 
+    GxB_MIN_TIMES_UINT8    , GxB_MAX_TIMES_UINT8    , GxB_PLUS_TIMES_UINT8   , GxB_TIMES_TIMES_UINT8  , GxB_ANY_TIMES_UINT8    , 
+    GxB_MIN_TIMES_INT16    , GxB_MAX_TIMES_INT16    , GxB_PLUS_TIMES_INT16   , GxB_TIMES_TIMES_INT16  , GxB_ANY_TIMES_INT16    , 
+    GxB_MIN_TIMES_UINT16   , GxB_MAX_TIMES_UINT16   , GxB_PLUS_TIMES_UINT16  , GxB_TIMES_TIMES_UINT16 , GxB_ANY_TIMES_UINT16   , 
+    GxB_MIN_TIMES_INT32    , GxB_MAX_TIMES_INT32    , GxB_PLUS_TIMES_INT32   , GxB_TIMES_TIMES_INT32  , GxB_ANY_TIMES_INT32    , 
+    GxB_MIN_TIMES_UINT32   , GxB_MAX_TIMES_UINT32   , GxB_PLUS_TIMES_UINT32  , GxB_TIMES_TIMES_UINT32 , GxB_ANY_TIMES_UINT32   , 
+    GxB_MIN_TIMES_INT64    , GxB_MAX_TIMES_INT64    , GxB_PLUS_TIMES_INT64   , GxB_TIMES_TIMES_INT64  , GxB_ANY_TIMES_INT64    , 
+    GxB_MIN_TIMES_UINT64   , GxB_MAX_TIMES_UINT64   , GxB_PLUS_TIMES_UINT64  , GxB_TIMES_TIMES_UINT64 , GxB_ANY_TIMES_UINT64   , 
+    GxB_MIN_TIMES_FP32     , GxB_MAX_TIMES_FP32     , GxB_PLUS_TIMES_FP32    , GxB_TIMES_TIMES_FP32   , GxB_ANY_TIMES_FP32     , 
+    GxB_MIN_TIMES_FP64     , GxB_MAX_TIMES_FP64     , GxB_PLUS_TIMES_FP64    , GxB_TIMES_TIMES_FP64   , GxB_ANY_TIMES_FP64     , 
+
+    // semirings with multiply op: z = DIV (x,y), all types x,y,z the same:
+    GxB_MIN_DIV_INT8       , GxB_MAX_DIV_INT8       , GxB_PLUS_DIV_INT8      , GxB_TIMES_DIV_INT8     , GxB_ANY_DIV_INT8       , 
+    GxB_MIN_DIV_UINT8      , GxB_MAX_DIV_UINT8      , GxB_PLUS_DIV_UINT8     , GxB_TIMES_DIV_UINT8    , GxB_ANY_DIV_UINT8      , 
+    GxB_MIN_DIV_INT16      , GxB_MAX_DIV_INT16      , GxB_PLUS_DIV_INT16     , GxB_TIMES_DIV_INT16    , GxB_ANY_DIV_INT16      , 
+    GxB_MIN_DIV_UINT16     , GxB_MAX_DIV_UINT16     , GxB_PLUS_DIV_UINT16    , GxB_TIMES_DIV_UINT16   , GxB_ANY_DIV_UINT16     , 
+    GxB_MIN_DIV_INT32      , GxB_MAX_DIV_INT32      , GxB_PLUS_DIV_INT32     , GxB_TIMES_DIV_INT32    , GxB_ANY_DIV_INT32      , 
+    GxB_MIN_DIV_UINT32     , GxB_MAX_DIV_UINT32     , GxB_PLUS_DIV_UINT32    , GxB_TIMES_DIV_UINT32   , GxB_ANY_DIV_UINT32     , 
+    GxB_MIN_DIV_INT64      , GxB_MAX_DIV_INT64      , GxB_PLUS_DIV_INT64     , GxB_TIMES_DIV_INT64    , GxB_ANY_DIV_INT64      , 
+    GxB_MIN_DIV_UINT64     , GxB_MAX_DIV_UINT64     , GxB_PLUS_DIV_UINT64    , GxB_TIMES_DIV_UINT64   , GxB_ANY_DIV_UINT64     , 
+    GxB_MIN_DIV_FP32       , GxB_MAX_DIV_FP32       , GxB_PLUS_DIV_FP32      , GxB_TIMES_DIV_FP32     , GxB_ANY_DIV_FP32       , 
+    GxB_MIN_DIV_FP64       , GxB_MAX_DIV_FP64       , GxB_PLUS_DIV_FP64      , GxB_TIMES_DIV_FP64     , GxB_ANY_DIV_FP64       , 
+
+    // semirings with multiply op: z = RDIV (x,y), all types x,y,z the same:
+    GxB_MIN_RDIV_INT8      , GxB_MAX_RDIV_INT8      , GxB_PLUS_RDIV_INT8     , GxB_TIMES_RDIV_INT8    , GxB_ANY_RDIV_INT8      , 
+    GxB_MIN_RDIV_UINT8     , GxB_MAX_RDIV_UINT8     , GxB_PLUS_RDIV_UINT8    , GxB_TIMES_RDIV_UINT8   , GxB_ANY_RDIV_UINT8     , 
+    GxB_MIN_RDIV_INT16     , GxB_MAX_RDIV_INT16     , GxB_PLUS_RDIV_INT16    , GxB_TIMES_RDIV_INT16   , GxB_ANY_RDIV_INT16     , 
+    GxB_MIN_RDIV_UINT16    , GxB_MAX_RDIV_UINT16    , GxB_PLUS_RDIV_UINT16   , GxB_TIMES_RDIV_UINT16  , GxB_ANY_RDIV_UINT16    , 
+    GxB_MIN_RDIV_INT32     , GxB_MAX_RDIV_INT32     , GxB_PLUS_RDIV_INT32    , GxB_TIMES_RDIV_INT32   , GxB_ANY_RDIV_INT32     , 
+    GxB_MIN_RDIV_UINT32    , GxB_MAX_RDIV_UINT32    , GxB_PLUS_RDIV_UINT32   , GxB_TIMES_RDIV_UINT32  , GxB_ANY_RDIV_UINT32    , 
+    GxB_MIN_RDIV_INT64     , GxB_MAX_RDIV_INT64     , GxB_PLUS_RDIV_INT64    , GxB_TIMES_RDIV_INT64   , GxB_ANY_RDIV_INT64     , 
+    GxB_MIN_RDIV_UINT64    , GxB_MAX_RDIV_UINT64    , GxB_PLUS_RDIV_UINT64   , GxB_TIMES_RDIV_UINT64  , GxB_ANY_RDIV_UINT64    , 
+    GxB_MIN_RDIV_FP32      , GxB_MAX_RDIV_FP32      , GxB_PLUS_RDIV_FP32     , GxB_TIMES_RDIV_FP32    , GxB_ANY_RDIV_FP32      , 
+    GxB_MIN_RDIV_FP64      , GxB_MAX_RDIV_FP64      , GxB_PLUS_RDIV_FP64     , GxB_TIMES_RDIV_FP64    , GxB_ANY_RDIV_FP64      , 
+
+    // semirings with multiply op: z = RMINUS (x,y), all types x,y,z the same:
+    GxB_MIN_RMINUS_INT8    , GxB_MAX_RMINUS_INT8    , GxB_PLUS_RMINUS_INT8   , GxB_TIMES_RMINUS_INT8  , GxB_ANY_RMINUS_INT8    , 
+    GxB_MIN_RMINUS_UINT8   , GxB_MAX_RMINUS_UINT8   , GxB_PLUS_RMINUS_UINT8  , GxB_TIMES_RMINUS_UINT8 , GxB_ANY_RMINUS_UINT8   , 
+    GxB_MIN_RMINUS_INT16   , GxB_MAX_RMINUS_INT16   , GxB_PLUS_RMINUS_INT16  , GxB_TIMES_RMINUS_INT16 , GxB_ANY_RMINUS_INT16   , 
+    GxB_MIN_RMINUS_UINT16  , GxB_MAX_RMINUS_UINT16  , GxB_PLUS_RMINUS_UINT16 , GxB_TIMES_RMINUS_UINT16, GxB_ANY_RMINUS_UINT16  , 
+    GxB_MIN_RMINUS_INT32   , GxB_MAX_RMINUS_INT32   , GxB_PLUS_RMINUS_INT32  , GxB_TIMES_RMINUS_INT32 , GxB_ANY_RMINUS_INT32   , 
+    GxB_MIN_RMINUS_UINT32  , GxB_MAX_RMINUS_UINT32  , GxB_PLUS_RMINUS_UINT32 , GxB_TIMES_RMINUS_UINT32, GxB_ANY_RMINUS_UINT32  , 
+    GxB_MIN_RMINUS_INT64   , GxB_MAX_RMINUS_INT64   , GxB_PLUS_RMINUS_INT64  , GxB_TIMES_RMINUS_INT64 , GxB_ANY_RMINUS_INT64   , 
+    GxB_MIN_RMINUS_UINT64  , GxB_MAX_RMINUS_UINT64  , GxB_PLUS_RMINUS_UINT64 , GxB_TIMES_RMINUS_UINT64, GxB_ANY_RMINUS_UINT64  , 
+    GxB_MIN_RMINUS_FP32    , GxB_MAX_RMINUS_FP32    , GxB_PLUS_RMINUS_FP32   , GxB_TIMES_RMINUS_FP32  , GxB_ANY_RMINUS_FP32    , 
+    GxB_MIN_RMINUS_FP64    , GxB_MAX_RMINUS_FP64    , GxB_PLUS_RMINUS_FP64   , GxB_TIMES_RMINUS_FP64  , GxB_ANY_RMINUS_FP64    , 
+
+    // semirings with multiply op: z = ISEQ (x,y), all types x,y,z the same:
+    GxB_MIN_ISEQ_INT8      , GxB_MAX_ISEQ_INT8      , GxB_PLUS_ISEQ_INT8     , GxB_TIMES_ISEQ_INT8    , GxB_ANY_ISEQ_INT8      , 
+    GxB_MIN_ISEQ_UINT8     , GxB_MAX_ISEQ_UINT8     , GxB_PLUS_ISEQ_UINT8    , GxB_TIMES_ISEQ_UINT8   , GxB_ANY_ISEQ_UINT8     , 
+    GxB_MIN_ISEQ_INT16     , GxB_MAX_ISEQ_INT16     , GxB_PLUS_ISEQ_INT16    , GxB_TIMES_ISEQ_INT16   , GxB_ANY_ISEQ_INT16     , 
+    GxB_MIN_ISEQ_UINT16    , GxB_MAX_ISEQ_UINT16    , GxB_PLUS_ISEQ_UINT16   , GxB_TIMES_ISEQ_UINT16  , GxB_ANY_ISEQ_UINT16    , 
+    GxB_MIN_ISEQ_INT32     , GxB_MAX_ISEQ_INT32     , GxB_PLUS_ISEQ_INT32    , GxB_TIMES_ISEQ_INT32   , GxB_ANY_ISEQ_INT32     , 
+    GxB_MIN_ISEQ_UINT32    , GxB_MAX_ISEQ_UINT32    , GxB_PLUS_ISEQ_UINT32   , GxB_TIMES_ISEQ_UINT32  , GxB_ANY_ISEQ_UINT32    , 
+    GxB_MIN_ISEQ_INT64     , GxB_MAX_ISEQ_INT64     , GxB_PLUS_ISEQ_INT64    , GxB_TIMES_ISEQ_INT64   , GxB_ANY_ISEQ_INT64     , 
+    GxB_MIN_ISEQ_UINT64    , GxB_MAX_ISEQ_UINT64    , GxB_PLUS_ISEQ_UINT64   , GxB_TIMES_ISEQ_UINT64  , GxB_ANY_ISEQ_UINT64    , 
+    GxB_MIN_ISEQ_FP32      , GxB_MAX_ISEQ_FP32      , GxB_PLUS_ISEQ_FP32     , GxB_TIMES_ISEQ_FP32    , GxB_ANY_ISEQ_FP32      , 
+    GxB_MIN_ISEQ_FP64      , GxB_MAX_ISEQ_FP64      , GxB_PLUS_ISEQ_FP64     , GxB_TIMES_ISEQ_FP64    , GxB_ANY_ISEQ_FP64      , 
+
+    // semirings with multiply op: z = ISNE (x,y), all types x,y,z the same:
+    GxB_MIN_ISNE_INT8      , GxB_MAX_ISNE_INT8      , GxB_PLUS_ISNE_INT8     , GxB_TIMES_ISNE_INT8    , GxB_ANY_ISNE_INT8      , 
+    GxB_MIN_ISNE_UINT8     , GxB_MAX_ISNE_UINT8     , GxB_PLUS_ISNE_UINT8    , GxB_TIMES_ISNE_UINT8   , GxB_ANY_ISNE_UINT8     , 
+    GxB_MIN_ISNE_INT16     , GxB_MAX_ISNE_INT16     , GxB_PLUS_ISNE_INT16    , GxB_TIMES_ISNE_INT16   , GxB_ANY_ISNE_INT16     , 
+    GxB_MIN_ISNE_UINT16    , GxB_MAX_ISNE_UINT16    , GxB_PLUS_ISNE_UINT16   , GxB_TIMES_ISNE_UINT16  , GxB_ANY_ISNE_UINT16    , 
+    GxB_MIN_ISNE_INT32     , GxB_MAX_ISNE_INT32     , GxB_PLUS_ISNE_INT32    , GxB_TIMES_ISNE_INT32   , GxB_ANY_ISNE_INT32     , 
+    GxB_MIN_ISNE_UINT32    , GxB_MAX_ISNE_UINT32    , GxB_PLUS_ISNE_UINT32   , GxB_TIMES_ISNE_UINT32  , GxB_ANY_ISNE_UINT32    , 
+    GxB_MIN_ISNE_INT64     , GxB_MAX_ISNE_INT64     , GxB_PLUS_ISNE_INT64    , GxB_TIMES_ISNE_INT64   , GxB_ANY_ISNE_INT64     , 
+    GxB_MIN_ISNE_UINT64    , GxB_MAX_ISNE_UINT64    , GxB_PLUS_ISNE_UINT64   , GxB_TIMES_ISNE_UINT64  , GxB_ANY_ISNE_UINT64    , 
+    GxB_MIN_ISNE_FP32      , GxB_MAX_ISNE_FP32      , GxB_PLUS_ISNE_FP32     , GxB_TIMES_ISNE_FP32    , GxB_ANY_ISNE_FP32      , 
+    GxB_MIN_ISNE_FP64      , GxB_MAX_ISNE_FP64      , GxB_PLUS_ISNE_FP64     , GxB_TIMES_ISNE_FP64    , GxB_ANY_ISNE_FP64      , 
+
+    // semirings with multiply op: z = ISGT (x,y), all types x,y,z the same:
+    GxB_MIN_ISGT_INT8      , GxB_MAX_ISGT_INT8      , GxB_PLUS_ISGT_INT8     , GxB_TIMES_ISGT_INT8    , GxB_ANY_ISGT_INT8      , 
+    GxB_MIN_ISGT_UINT8     , GxB_MAX_ISGT_UINT8     , GxB_PLUS_ISGT_UINT8    , GxB_TIMES_ISGT_UINT8   , GxB_ANY_ISGT_UINT8     , 
+    GxB_MIN_ISGT_INT16     , GxB_MAX_ISGT_INT16     , GxB_PLUS_ISGT_INT16    , GxB_TIMES_ISGT_INT16   , GxB_ANY_ISGT_INT16     , 
+    GxB_MIN_ISGT_UINT16    , GxB_MAX_ISGT_UINT16    , GxB_PLUS_ISGT_UINT16   , GxB_TIMES_ISGT_UINT16  , GxB_ANY_ISGT_UINT16    , 
+    GxB_MIN_ISGT_INT32     , GxB_MAX_ISGT_INT32     , GxB_PLUS_ISGT_INT32    , GxB_TIMES_ISGT_INT32   , GxB_ANY_ISGT_INT32     , 
+    GxB_MIN_ISGT_UINT32    , GxB_MAX_ISGT_UINT32    , GxB_PLUS_ISGT_UINT32   , GxB_TIMES_ISGT_UINT32  , GxB_ANY_ISGT_UINT32    , 
+    GxB_MIN_ISGT_INT64     , GxB_MAX_ISGT_INT64     , GxB_PLUS_ISGT_INT64    , GxB_TIMES_ISGT_INT64   , GxB_ANY_ISGT_INT64     , 
+    GxB_MIN_ISGT_UINT64    , GxB_MAX_ISGT_UINT64    , GxB_PLUS_ISGT_UINT64   , GxB_TIMES_ISGT_UINT64  , GxB_ANY_ISGT_UINT64    , 
+    GxB_MIN_ISGT_FP32      , GxB_MAX_ISGT_FP32      , GxB_PLUS_ISGT_FP32     , GxB_TIMES_ISGT_FP32    , GxB_ANY_ISGT_FP32      , 
+    GxB_MIN_ISGT_FP64      , GxB_MAX_ISGT_FP64      , GxB_PLUS_ISGT_FP64     , GxB_TIMES_ISGT_FP64    , GxB_ANY_ISGT_FP64      , 
+
+    // semirings with multiply op: z = ISLT (x,y), all types x,y,z the same:
+    GxB_MIN_ISLT_INT8      , GxB_MAX_ISLT_INT8      , GxB_PLUS_ISLT_INT8     , GxB_TIMES_ISLT_INT8    , GxB_ANY_ISLT_INT8      , 
+    GxB_MIN_ISLT_UINT8     , GxB_MAX_ISLT_UINT8     , GxB_PLUS_ISLT_UINT8    , GxB_TIMES_ISLT_UINT8   , GxB_ANY_ISLT_UINT8     , 
+    GxB_MIN_ISLT_INT16     , GxB_MAX_ISLT_INT16     , GxB_PLUS_ISLT_INT16    , GxB_TIMES_ISLT_INT16   , GxB_ANY_ISLT_INT16     , 
+    GxB_MIN_ISLT_UINT16    , GxB_MAX_ISLT_UINT16    , GxB_PLUS_ISLT_UINT16   , GxB_TIMES_ISLT_UINT16  , GxB_ANY_ISLT_UINT16    , 
+    GxB_MIN_ISLT_INT32     , GxB_MAX_ISLT_INT32     , GxB_PLUS_ISLT_INT32    , GxB_TIMES_ISLT_INT32   , GxB_ANY_ISLT_INT32     , 
+    GxB_MIN_ISLT_UINT32    , GxB_MAX_ISLT_UINT32    , GxB_PLUS_ISLT_UINT32   , GxB_TIMES_ISLT_UINT32  , GxB_ANY_ISLT_UINT32    , 
+    GxB_MIN_ISLT_INT64     , GxB_MAX_ISLT_INT64     , GxB_PLUS_ISLT_INT64    , GxB_TIMES_ISLT_INT64   , GxB_ANY_ISLT_INT64     , 
+    GxB_MIN_ISLT_UINT64    , GxB_MAX_ISLT_UINT64    , GxB_PLUS_ISLT_UINT64   , GxB_TIMES_ISLT_UINT64  , GxB_ANY_ISLT_UINT64    , 
+    GxB_MIN_ISLT_FP32      , GxB_MAX_ISLT_FP32      , GxB_PLUS_ISLT_FP32     , GxB_TIMES_ISLT_FP32    , GxB_ANY_ISLT_FP32      , 
+    GxB_MIN_ISLT_FP64      , GxB_MAX_ISLT_FP64      , GxB_PLUS_ISLT_FP64     , GxB_TIMES_ISLT_FP64    , GxB_ANY_ISLT_FP64      , 
+
+    // semirings with multiply op: z = ISGE (x,y), all types x,y,z the same:
+    GxB_MIN_ISGE_INT8      , GxB_MAX_ISGE_INT8      , GxB_PLUS_ISGE_INT8     , GxB_TIMES_ISGE_INT8    , GxB_ANY_ISGE_INT8      , 
+    GxB_MIN_ISGE_UINT8     , GxB_MAX_ISGE_UINT8     , GxB_PLUS_ISGE_UINT8    , GxB_TIMES_ISGE_UINT8   , GxB_ANY_ISGE_UINT8     , 
+    GxB_MIN_ISGE_INT16     , GxB_MAX_ISGE_INT16     , GxB_PLUS_ISGE_INT16    , GxB_TIMES_ISGE_INT16   , GxB_ANY_ISGE_INT16     , 
+    GxB_MIN_ISGE_UINT16    , GxB_MAX_ISGE_UINT16    , GxB_PLUS_ISGE_UINT16   , GxB_TIMES_ISGE_UINT16  , GxB_ANY_ISGE_UINT16    , 
+    GxB_MIN_ISGE_INT32     , GxB_MAX_ISGE_INT32     , GxB_PLUS_ISGE_INT32    , GxB_TIMES_ISGE_INT32   , GxB_ANY_ISGE_INT32     , 
+    GxB_MIN_ISGE_UINT32    , GxB_MAX_ISGE_UINT32    , GxB_PLUS_ISGE_UINT32   , GxB_TIMES_ISGE_UINT32  , GxB_ANY_ISGE_UINT32    , 
+    GxB_MIN_ISGE_INT64     , GxB_MAX_ISGE_INT64     , GxB_PLUS_ISGE_INT64    , GxB_TIMES_ISGE_INT64   , GxB_ANY_ISGE_INT64     , 
+    GxB_MIN_ISGE_UINT64    , GxB_MAX_ISGE_UINT64    , GxB_PLUS_ISGE_UINT64   , GxB_TIMES_ISGE_UINT64  , GxB_ANY_ISGE_UINT64    , 
+    GxB_MIN_ISGE_FP32      , GxB_MAX_ISGE_FP32      , GxB_PLUS_ISGE_FP32     , GxB_TIMES_ISGE_FP32    , GxB_ANY_ISGE_FP32      , 
+    GxB_MIN_ISGE_FP64      , GxB_MAX_ISGE_FP64      , GxB_PLUS_ISGE_FP64     , GxB_TIMES_ISGE_FP64    , GxB_ANY_ISGE_FP64      , 
+
+    // semirings with multiply op: z = ISLE (x,y), all types x,y,z the same:
+    GxB_MIN_ISLE_INT8      , GxB_MAX_ISLE_INT8      , GxB_PLUS_ISLE_INT8     , GxB_TIMES_ISLE_INT8    , GxB_ANY_ISLE_INT8      , 
+    GxB_MIN_ISLE_UINT8     , GxB_MAX_ISLE_UINT8     , GxB_PLUS_ISLE_UINT8    , GxB_TIMES_ISLE_UINT8   , GxB_ANY_ISLE_UINT8     , 
+    GxB_MIN_ISLE_INT16     , GxB_MAX_ISLE_INT16     , GxB_PLUS_ISLE_INT16    , GxB_TIMES_ISLE_INT16   , GxB_ANY_ISLE_INT16     , 
+    GxB_MIN_ISLE_UINT16    , GxB_MAX_ISLE_UINT16    , GxB_PLUS_ISLE_UINT16   , GxB_TIMES_ISLE_UINT16  , GxB_ANY_ISLE_UINT16    , 
+    GxB_MIN_ISLE_INT32     , GxB_MAX_ISLE_INT32     , GxB_PLUS_ISLE_INT32    , GxB_TIMES_ISLE_INT32   , GxB_ANY_ISLE_INT32     , 
+    GxB_MIN_ISLE_UINT32    , GxB_MAX_ISLE_UINT32    , GxB_PLUS_ISLE_UINT32   , GxB_TIMES_ISLE_UINT32  , GxB_ANY_ISLE_UINT32    , 
+    GxB_MIN_ISLE_INT64     , GxB_MAX_ISLE_INT64     , GxB_PLUS_ISLE_INT64    , GxB_TIMES_ISLE_INT64   , GxB_ANY_ISLE_INT64     , 
+    GxB_MIN_ISLE_UINT64    , GxB_MAX_ISLE_UINT64    , GxB_PLUS_ISLE_UINT64   , GxB_TIMES_ISLE_UINT64  , GxB_ANY_ISLE_UINT64    , 
+    GxB_MIN_ISLE_FP32      , GxB_MAX_ISLE_FP32      , GxB_PLUS_ISLE_FP32     , GxB_TIMES_ISLE_FP32    , GxB_ANY_ISLE_FP32      , 
+    GxB_MIN_ISLE_FP64      , GxB_MAX_ISLE_FP64      , GxB_PLUS_ISLE_FP64     , GxB_TIMES_ISLE_FP64    , GxB_ANY_ISLE_FP64      , 
+
+    // semirings with multiply op: z = LOR (x,y), all types x,y,z the same:
+    GxB_MIN_LOR_INT8       , GxB_MAX_LOR_INT8       , GxB_PLUS_LOR_INT8      , GxB_TIMES_LOR_INT8     , GxB_ANY_LOR_INT8       , 
+    GxB_MIN_LOR_UINT8      , GxB_MAX_LOR_UINT8      , GxB_PLUS_LOR_UINT8     , GxB_TIMES_LOR_UINT8    , GxB_ANY_LOR_UINT8      , 
+    GxB_MIN_LOR_INT16      , GxB_MAX_LOR_INT16      , GxB_PLUS_LOR_INT16     , GxB_TIMES_LOR_INT16    , GxB_ANY_LOR_INT16      , 
+    GxB_MIN_LOR_UINT16     , GxB_MAX_LOR_UINT16     , GxB_PLUS_LOR_UINT16    , GxB_TIMES_LOR_UINT16   , GxB_ANY_LOR_UINT16     , 
+    GxB_MIN_LOR_INT32      , GxB_MAX_LOR_INT32      , GxB_PLUS_LOR_INT32     , GxB_TIMES_LOR_INT32    , GxB_ANY_LOR_INT32      , 
+    GxB_MIN_LOR_UINT32     , GxB_MAX_LOR_UINT32     , GxB_PLUS_LOR_UINT32    , GxB_TIMES_LOR_UINT32   , GxB_ANY_LOR_UINT32     , 
+    GxB_MIN_LOR_INT64      , GxB_MAX_LOR_INT64      , GxB_PLUS_LOR_INT64     , GxB_TIMES_LOR_INT64    , GxB_ANY_LOR_INT64      , 
+    GxB_MIN_LOR_UINT64     , GxB_MAX_LOR_UINT64     , GxB_PLUS_LOR_UINT64    , GxB_TIMES_LOR_UINT64   , GxB_ANY_LOR_UINT64     , 
+    GxB_MIN_LOR_FP32       , GxB_MAX_LOR_FP32       , GxB_PLUS_LOR_FP32      , GxB_TIMES_LOR_FP32     , GxB_ANY_LOR_FP32       , 
+    GxB_MIN_LOR_FP64       , GxB_MAX_LOR_FP64       , GxB_PLUS_LOR_FP64      , GxB_TIMES_LOR_FP64     , GxB_ANY_LOR_FP64       , 
+
+    // semirings with multiply op: z = LAND (x,y), all types x,y,z the same:
+    GxB_MIN_LAND_INT8      , GxB_MAX_LAND_INT8      , GxB_PLUS_LAND_INT8     , GxB_TIMES_LAND_INT8    , GxB_ANY_LAND_INT8      , 
+    GxB_MIN_LAND_UINT8     , GxB_MAX_LAND_UINT8     , GxB_PLUS_LAND_UINT8    , GxB_TIMES_LAND_UINT8   , GxB_ANY_LAND_UINT8     , 
+    GxB_MIN_LAND_INT16     , GxB_MAX_LAND_INT16     , GxB_PLUS_LAND_INT16    , GxB_TIMES_LAND_INT16   , GxB_ANY_LAND_INT16     , 
+    GxB_MIN_LAND_UINT16    , GxB_MAX_LAND_UINT16    , GxB_PLUS_LAND_UINT16   , GxB_TIMES_LAND_UINT16  , GxB_ANY_LAND_UINT16    , 
+    GxB_MIN_LAND_INT32     , GxB_MAX_LAND_INT32     , GxB_PLUS_LAND_INT32    , GxB_TIMES_LAND_INT32   , GxB_ANY_LAND_INT32     , 
+    GxB_MIN_LAND_UINT32    , GxB_MAX_LAND_UINT32    , GxB_PLUS_LAND_UINT32   , GxB_TIMES_LAND_UINT32  , GxB_ANY_LAND_UINT32    , 
+    GxB_MIN_LAND_INT64     , GxB_MAX_LAND_INT64     , GxB_PLUS_LAND_INT64    , GxB_TIMES_LAND_INT64   , GxB_ANY_LAND_INT64     , 
+    GxB_MIN_LAND_UINT64    , GxB_MAX_LAND_UINT64    , GxB_PLUS_LAND_UINT64   , GxB_TIMES_LAND_UINT64  , GxB_ANY_LAND_UINT64    , 
+    GxB_MIN_LAND_FP32      , GxB_MAX_LAND_FP32      , GxB_PLUS_LAND_FP32     , GxB_TIMES_LAND_FP32    , GxB_ANY_LAND_FP32      , 
+    GxB_MIN_LAND_FP64      , GxB_MAX_LAND_FP64      , GxB_PLUS_LAND_FP64     , GxB_TIMES_LAND_FP64    , GxB_ANY_LAND_FP64      , 
+
+    // semirings with multiply op: z = LXOR (x,y), all types x,y,z the same:
+    GxB_MIN_LXOR_INT8      , GxB_MAX_LXOR_INT8      , GxB_PLUS_LXOR_INT8     , GxB_TIMES_LXOR_INT8    , GxB_ANY_LXOR_INT8      , 
+    GxB_MIN_LXOR_UINT8     , GxB_MAX_LXOR_UINT8     , GxB_PLUS_LXOR_UINT8    , GxB_TIMES_LXOR_UINT8   , GxB_ANY_LXOR_UINT8     , 
+    GxB_MIN_LXOR_INT16     , GxB_MAX_LXOR_INT16     , GxB_PLUS_LXOR_INT16    , GxB_TIMES_LXOR_INT16   , GxB_ANY_LXOR_INT16     , 
+    GxB_MIN_LXOR_UINT16    , GxB_MAX_LXOR_UINT16    , GxB_PLUS_LXOR_UINT16   , GxB_TIMES_LXOR_UINT16  , GxB_ANY_LXOR_UINT16    , 
+    GxB_MIN_LXOR_INT32     , GxB_MAX_LXOR_INT32     , GxB_PLUS_LXOR_INT32    , GxB_TIMES_LXOR_INT32   , GxB_ANY_LXOR_INT32     , 
+    GxB_MIN_LXOR_UINT32    , GxB_MAX_LXOR_UINT32    , GxB_PLUS_LXOR_UINT32   , GxB_TIMES_LXOR_UINT32  , GxB_ANY_LXOR_UINT32    , 
+    GxB_MIN_LXOR_INT64     , GxB_MAX_LXOR_INT64     , GxB_PLUS_LXOR_INT64    , GxB_TIMES_LXOR_INT64   , GxB_ANY_LXOR_INT64     , 
+    GxB_MIN_LXOR_UINT64    , GxB_MAX_LXOR_UINT64    , GxB_PLUS_LXOR_UINT64   , GxB_TIMES_LXOR_UINT64  , GxB_ANY_LXOR_UINT64    , 
+    GxB_MIN_LXOR_FP32      , GxB_MAX_LXOR_FP32      , GxB_PLUS_LXOR_FP32     , GxB_TIMES_LXOR_FP32    , GxB_ANY_LXOR_FP32      , 
+    GxB_MIN_LXOR_FP64      , GxB_MAX_LXOR_FP64      , GxB_PLUS_LXOR_FP64     , GxB_TIMES_LXOR_FP64    , GxB_ANY_LXOR_FP64      , 
+
+//------------------------------------------------------------------------------
+// 300 semirings with a comparison operator TxT -> bool, where T is non-Boolean
+//------------------------------------------------------------------------------
+
+    // semirings with multiply op: z = EQ (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_EQ_INT8        , GxB_LAND_EQ_INT8       , GxB_LXOR_EQ_INT8       , GxB_EQ_EQ_INT8         , GxB_ANY_EQ_INT8        , 
+    GxB_LOR_EQ_UINT8       , GxB_LAND_EQ_UINT8      , GxB_LXOR_EQ_UINT8      , GxB_EQ_EQ_UINT8        , GxB_ANY_EQ_UINT8       , 
+    GxB_LOR_EQ_INT16       , GxB_LAND_EQ_INT16      , GxB_LXOR_EQ_INT16      , GxB_EQ_EQ_INT16        , GxB_ANY_EQ_INT16       , 
+    GxB_LOR_EQ_UINT16      , GxB_LAND_EQ_UINT16     , GxB_LXOR_EQ_UINT16     , GxB_EQ_EQ_UINT16       , GxB_ANY_EQ_UINT16      , 
+    GxB_LOR_EQ_INT32       , GxB_LAND_EQ_INT32      , GxB_LXOR_EQ_INT32      , GxB_EQ_EQ_INT32        , GxB_ANY_EQ_INT32       , 
+    GxB_LOR_EQ_UINT32      , GxB_LAND_EQ_UINT32     , GxB_LXOR_EQ_UINT32     , GxB_EQ_EQ_UINT32       , GxB_ANY_EQ_UINT32      , 
+    GxB_LOR_EQ_INT64       , GxB_LAND_EQ_INT64      , GxB_LXOR_EQ_INT64      , GxB_EQ_EQ_INT64        , GxB_ANY_EQ_INT64       , 
+    GxB_LOR_EQ_UINT64      , GxB_LAND_EQ_UINT64     , GxB_LXOR_EQ_UINT64     , GxB_EQ_EQ_UINT64       , GxB_ANY_EQ_UINT64      , 
+    GxB_LOR_EQ_FP32        , GxB_LAND_EQ_FP32       , GxB_LXOR_EQ_FP32       , GxB_EQ_EQ_FP32         , GxB_ANY_EQ_FP32        , 
+    GxB_LOR_EQ_FP64        , GxB_LAND_EQ_FP64       , GxB_LXOR_EQ_FP64       , GxB_EQ_EQ_FP64         , GxB_ANY_EQ_FP64        , 
+
+    // semirings with multiply op: z = NE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_NE_INT8        , GxB_LAND_NE_INT8       , GxB_LXOR_NE_INT8       , GxB_EQ_NE_INT8         , GxB_ANY_NE_INT8        , 
+    GxB_LOR_NE_UINT8       , GxB_LAND_NE_UINT8      , GxB_LXOR_NE_UINT8      , GxB_EQ_NE_UINT8        , GxB_ANY_NE_UINT8       , 
+    GxB_LOR_NE_INT16       , GxB_LAND_NE_INT16      , GxB_LXOR_NE_INT16      , GxB_EQ_NE_INT16        , GxB_ANY_NE_INT16       , 
+    GxB_LOR_NE_UINT16      , GxB_LAND_NE_UINT16     , GxB_LXOR_NE_UINT16     , GxB_EQ_NE_UINT16       , GxB_ANY_NE_UINT16      , 
+    GxB_LOR_NE_INT32       , GxB_LAND_NE_INT32      , GxB_LXOR_NE_INT32      , GxB_EQ_NE_INT32        , GxB_ANY_NE_INT32       , 
+    GxB_LOR_NE_UINT32      , GxB_LAND_NE_UINT32     , GxB_LXOR_NE_UINT32     , GxB_EQ_NE_UINT32       , GxB_ANY_NE_UINT32      , 
+    GxB_LOR_NE_INT64       , GxB_LAND_NE_INT64      , GxB_LXOR_NE_INT64      , GxB_EQ_NE_INT64        , GxB_ANY_NE_INT64       , 
+    GxB_LOR_NE_UINT64      , GxB_LAND_NE_UINT64     , GxB_LXOR_NE_UINT64     , GxB_EQ_NE_UINT64       , GxB_ANY_NE_UINT64      , 
+    GxB_LOR_NE_FP32        , GxB_LAND_NE_FP32       , GxB_LXOR_NE_FP32       , GxB_EQ_NE_FP32         , GxB_ANY_NE_FP32        , 
+    GxB_LOR_NE_FP64        , GxB_LAND_NE_FP64       , GxB_LXOR_NE_FP64       , GxB_EQ_NE_FP64         , GxB_ANY_NE_FP64        , 
+
+    // semirings with multiply op: z = GT (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_GT_INT8        , GxB_LAND_GT_INT8       , GxB_LXOR_GT_INT8       , GxB_EQ_GT_INT8         , GxB_ANY_GT_INT8        , 
+    GxB_LOR_GT_UINT8       , GxB_LAND_GT_UINT8      , GxB_LXOR_GT_UINT8      , GxB_EQ_GT_UINT8        , GxB_ANY_GT_UINT8       , 
+    GxB_LOR_GT_INT16       , GxB_LAND_GT_INT16      , GxB_LXOR_GT_INT16      , GxB_EQ_GT_INT16        , GxB_ANY_GT_INT16       , 
+    GxB_LOR_GT_UINT16      , GxB_LAND_GT_UINT16     , GxB_LXOR_GT_UINT16     , GxB_EQ_GT_UINT16       , GxB_ANY_GT_UINT16      , 
+    GxB_LOR_GT_INT32       , GxB_LAND_GT_INT32      , GxB_LXOR_GT_INT32      , GxB_EQ_GT_INT32        , GxB_ANY_GT_INT32       , 
+    GxB_LOR_GT_UINT32      , GxB_LAND_GT_UINT32     , GxB_LXOR_GT_UINT32     , GxB_EQ_GT_UINT32       , GxB_ANY_GT_UINT32      , 
+    GxB_LOR_GT_INT64       , GxB_LAND_GT_INT64      , GxB_LXOR_GT_INT64      , GxB_EQ_GT_INT64        , GxB_ANY_GT_INT64       , 
+    GxB_LOR_GT_UINT64      , GxB_LAND_GT_UINT64     , GxB_LXOR_GT_UINT64     , GxB_EQ_GT_UINT64       , GxB_ANY_GT_UINT64      , 
+    GxB_LOR_GT_FP32        , GxB_LAND_GT_FP32       , GxB_LXOR_GT_FP32       , GxB_EQ_GT_FP32         , GxB_ANY_GT_FP32        , 
+    GxB_LOR_GT_FP64        , GxB_LAND_GT_FP64       , GxB_LXOR_GT_FP64       , GxB_EQ_GT_FP64         , GxB_ANY_GT_FP64        , 
+
+    // semirings with multiply op: z = LT (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_LT_INT8        , GxB_LAND_LT_INT8       , GxB_LXOR_LT_INT8       , GxB_EQ_LT_INT8         , GxB_ANY_LT_INT8        , 
+    GxB_LOR_LT_UINT8       , GxB_LAND_LT_UINT8      , GxB_LXOR_LT_UINT8      , GxB_EQ_LT_UINT8        , GxB_ANY_LT_UINT8       , 
+    GxB_LOR_LT_INT16       , GxB_LAND_LT_INT16      , GxB_LXOR_LT_INT16      , GxB_EQ_LT_INT16        , GxB_ANY_LT_INT16       , 
+    GxB_LOR_LT_UINT16      , GxB_LAND_LT_UINT16     , GxB_LXOR_LT_UINT16     , GxB_EQ_LT_UINT16       , GxB_ANY_LT_UINT16      , 
+    GxB_LOR_LT_INT32       , GxB_LAND_LT_INT32      , GxB_LXOR_LT_INT32      , GxB_EQ_LT_INT32        , GxB_ANY_LT_INT32       , 
+    GxB_LOR_LT_UINT32      , GxB_LAND_LT_UINT32     , GxB_LXOR_LT_UINT32     , GxB_EQ_LT_UINT32       , GxB_ANY_LT_UINT32      , 
+    GxB_LOR_LT_INT64       , GxB_LAND_LT_INT64      , GxB_LXOR_LT_INT64      , GxB_EQ_LT_INT64        , GxB_ANY_LT_INT64       , 
+    GxB_LOR_LT_UINT64      , GxB_LAND_LT_UINT64     , GxB_LXOR_LT_UINT64     , GxB_EQ_LT_UINT64       , GxB_ANY_LT_UINT64      , 
+    GxB_LOR_LT_FP32        , GxB_LAND_LT_FP32       , GxB_LXOR_LT_FP32       , GxB_EQ_LT_FP32         , GxB_ANY_LT_FP32        , 
+    GxB_LOR_LT_FP64        , GxB_LAND_LT_FP64       , GxB_LXOR_LT_FP64       , GxB_EQ_LT_FP64         , GxB_ANY_LT_FP64        , 
+
+    // semirings with multiply op: z = GE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_GE_INT8        , GxB_LAND_GE_INT8       , GxB_LXOR_GE_INT8       , GxB_EQ_GE_INT8         , GxB_ANY_GE_INT8        , 
+    GxB_LOR_GE_UINT8       , GxB_LAND_GE_UINT8      , GxB_LXOR_GE_UINT8      , GxB_EQ_GE_UINT8        , GxB_ANY_GE_UINT8       , 
+    GxB_LOR_GE_INT16       , GxB_LAND_GE_INT16      , GxB_LXOR_GE_INT16      , GxB_EQ_GE_INT16        , GxB_ANY_GE_INT16       , 
+    GxB_LOR_GE_UINT16      , GxB_LAND_GE_UINT16     , GxB_LXOR_GE_UINT16     , GxB_EQ_GE_UINT16       , GxB_ANY_GE_UINT16      , 
+    GxB_LOR_GE_INT32       , GxB_LAND_GE_INT32      , GxB_LXOR_GE_INT32      , GxB_EQ_GE_INT32        , GxB_ANY_GE_INT32       , 
+    GxB_LOR_GE_UINT32      , GxB_LAND_GE_UINT32     , GxB_LXOR_GE_UINT32     , GxB_EQ_GE_UINT32       , GxB_ANY_GE_UINT32      , 
+    GxB_LOR_GE_INT64       , GxB_LAND_GE_INT64      , GxB_LXOR_GE_INT64      , GxB_EQ_GE_INT64        , GxB_ANY_GE_INT64       , 
+    GxB_LOR_GE_UINT64      , GxB_LAND_GE_UINT64     , GxB_LXOR_GE_UINT64     , GxB_EQ_GE_UINT64       , GxB_ANY_GE_UINT64      , 
+    GxB_LOR_GE_FP32        , GxB_LAND_GE_FP32       , GxB_LXOR_GE_FP32       , GxB_EQ_GE_FP32         , GxB_ANY_GE_FP32        , 
+    GxB_LOR_GE_FP64        , GxB_LAND_GE_FP64       , GxB_LXOR_GE_FP64       , GxB_EQ_GE_FP64         , GxB_ANY_GE_FP64        , 
+
+    // semirings with multiply op: z = LE (x,y), where z is boolean and x,y are given by the suffix:
+    GxB_LOR_LE_INT8        , GxB_LAND_LE_INT8       , GxB_LXOR_LE_INT8       , GxB_EQ_LE_INT8         , GxB_ANY_LE_INT8        , 
+    GxB_LOR_LE_UINT8       , GxB_LAND_LE_UINT8      , GxB_LXOR_LE_UINT8      , GxB_EQ_LE_UINT8        , GxB_ANY_LE_UINT8       , 
+    GxB_LOR_LE_INT16       , GxB_LAND_LE_INT16      , GxB_LXOR_LE_INT16      , GxB_EQ_LE_INT16        , GxB_ANY_LE_INT16       , 
+    GxB_LOR_LE_UINT16      , GxB_LAND_LE_UINT16     , GxB_LXOR_LE_UINT16     , GxB_EQ_LE_UINT16       , GxB_ANY_LE_UINT16      , 
+    GxB_LOR_LE_INT32       , GxB_LAND_LE_INT32      , GxB_LXOR_LE_INT32      , GxB_EQ_LE_INT32        , GxB_ANY_LE_INT32       , 
+    GxB_LOR_LE_UINT32      , GxB_LAND_LE_UINT32     , GxB_LXOR_LE_UINT32     , GxB_EQ_LE_UINT32       , GxB_ANY_LE_UINT32      , 
+    GxB_LOR_LE_INT64       , GxB_LAND_LE_INT64      , GxB_LXOR_LE_INT64      , GxB_EQ_LE_INT64        , GxB_ANY_LE_INT64       , 
+    GxB_LOR_LE_UINT64      , GxB_LAND_LE_UINT64     , GxB_LXOR_LE_UINT64     , GxB_EQ_LE_UINT64       , GxB_ANY_LE_UINT64      , 
+    GxB_LOR_LE_FP32        , GxB_LAND_LE_FP32       , GxB_LXOR_LE_FP32       , GxB_EQ_LE_FP32         , GxB_ANY_LE_FP32        , 
+    GxB_LOR_LE_FP64        , GxB_LAND_LE_FP64       , GxB_LXOR_LE_FP64       , GxB_EQ_LE_FP64         , GxB_ANY_LE_FP64        , 
+
+//------------------------------------------------------------------------------
+// 55 semirings with purely Boolean types, bool x bool -> bool
+//------------------------------------------------------------------------------
+
+    // purely boolean semirings (in the form GxB_(add monoid)_(multipy operator)_BOOL:
+    GxB_LOR_FIRST_BOOL     , GxB_LAND_FIRST_BOOL    , GxB_LXOR_FIRST_BOOL    , GxB_EQ_FIRST_BOOL      , GxB_ANY_FIRST_BOOL     , 
+    GxB_LOR_SECOND_BOOL    , GxB_LAND_SECOND_BOOL   , GxB_LXOR_SECOND_BOOL   , GxB_EQ_SECOND_BOOL     , GxB_ANY_SECOND_BOOL    , 
+    GxB_LOR_PAIR_BOOL      , GxB_LAND_PAIR_BOOL     , GxB_LXOR_PAIR_BOOL     , GxB_EQ_PAIR_BOOL       , GxB_ANY_PAIR_BOOL      , 
+    GxB_LOR_LOR_BOOL       , GxB_LAND_LOR_BOOL      , GxB_LXOR_LOR_BOOL      , GxB_EQ_LOR_BOOL        , GxB_ANY_LOR_BOOL       , 
+    GxB_LOR_LAND_BOOL      , GxB_LAND_LAND_BOOL     , GxB_LXOR_LAND_BOOL     , GxB_EQ_LAND_BOOL       , GxB_ANY_LAND_BOOL      , 
+    GxB_LOR_LXOR_BOOL      , GxB_LAND_LXOR_BOOL     , GxB_LXOR_LXOR_BOOL     , GxB_EQ_LXOR_BOOL       , GxB_ANY_LXOR_BOOL      , 
+    GxB_LOR_EQ_BOOL        , GxB_LAND_EQ_BOOL       , GxB_LXOR_EQ_BOOL       , GxB_EQ_EQ_BOOL         , GxB_ANY_EQ_BOOL        , 
+    GxB_LOR_GT_BOOL        , GxB_LAND_GT_BOOL       , GxB_LXOR_GT_BOOL       , GxB_EQ_GT_BOOL         , GxB_ANY_GT_BOOL        , 
+    GxB_LOR_LT_BOOL        , GxB_LAND_LT_BOOL       , GxB_LXOR_LT_BOOL       , GxB_EQ_LT_BOOL         , GxB_ANY_LT_BOOL        , 
+    GxB_LOR_GE_BOOL        , GxB_LAND_GE_BOOL       , GxB_LXOR_GE_BOOL       , GxB_EQ_GE_BOOL         , GxB_ANY_GE_BOOL        , 
+    GxB_LOR_LE_BOOL        , GxB_LAND_LE_BOOL       , GxB_LXOR_LE_BOOL       , GxB_EQ_LE_BOOL         , GxB_ANY_LE_BOOL        ; 
+
 
 //------------------------------------------------------------------------------
 // GxB_resize:  change the size of a matrix or vector
@@ -6946,31 +7051,3 @@ GrB_Info GxB_Vector_export  // export and free a vector
 
 #endif
 
-//==============================================================================
-// user-defined objects defined by SuiteSparse/GraphBLAS/User/*.m4
-//==============================================================================
-
-// Declarations appended to SuiteSparse/GraphBLAS/Include/GraphBLAS.h.
-
-#if defined __INTEL_COMPILER
-#pragma warning (disable: 869 )
-#elif defined __GNUC__
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#endif
-
-#ifndef GxB_USER_INCLUDE
-#define GxB_USER_INCLUDE
-#endif
-
-#ifndef GxB_USER_H
-#define GxB_USER_H
-
-
-
- 
-
-
-
-#endif
-
-#undef GxB_USER_INCLUDE
diff --git a/Makefile b/Makefile
index 11d721bac7..dd863d70e0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 # GraphBLAS/Makefile
 #-------------------------------------------------------------------------------
 
-# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 # http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 #-------------------------------------------------------------------------------
@@ -14,7 +14,7 @@ JOBS ?= 1
 
 default: library
 
-# just build the static and dynamic libraries; do not run the demo
+# just build the dynamic library; do not run the demo
 library:
 	( cd build ; cmake $(CMAKE_OPTIONS) .. ; $(MAKE) --jobs=$(JOBS) )
 
@@ -30,7 +30,7 @@ remake:
 cmake:
 	( cd build ; cmake $(CMAKE_OPTIONS) .. ; )
 
-# the same as "make library"
+# build both the static and dynamic libraries do not run the demo
 static:
 	( cd build ; cmake $(CMAKE_OPTIONS) -DBUILD_GRB_STATIC_LIBRARY=1 .. ; $(MAKE) --jobs=$(JOBS) )
 
@@ -61,4 +61,5 @@ distclean:
 	( cd Test ; $(MAKE) distclean )
 	( cd Tcov ; $(MAKE) distclean )
 	( cd Doc  ; $(MAKE) distclean )
+	( cd alternative  ; $(MAKE) distclean )
 
diff --git a/README.md b/README.md
index 2536ac3a3e..dcacbf9bf5 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 # SuiteSparse:GraphBLAS
 
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-VERSION 3.1.2, Dec 16, 2019
+VERSION 3.2.0, Feb 20, 2020
 
 SuiteSparse:GraphBLAS is an full implementation of the GraphBLAS standard,
 which defines a set of sparse matrix operations on an extended algebra of
@@ -85,8 +85,6 @@ Test:           Extensive tests, not meant for general usage.  To compile
                 SuiteSparse:GraphBLAS and test in MATLAB, go to this directory
                 and type make;testall in MATLAB.
 
-User:           user-defined objects at compile-time (.m4 files)
-
 build:          build directory for CMake, initially empty
 
 alternative:    an alternative to CMake; edit the alternative/Makefile and do
@@ -104,3 +102,11 @@ file.  All functions, objects, and macros with the prefix GxB are extensions to
 the spec.  Functions, objects, and macros with prefix GB must not be accessed
 by user code.  They are for internal use in GraphBLAS only.
 
+
+--------------------------------------------------------------------------------
+## For Windows users:
+
+This version is not compatible with Microsoft Visual Studio.  Use another
+compiler, or use SuiteSparse:GraphBLAS v3.1.2 instead.  See the User Guide
+for more details.
+
diff --git a/Source/GB.h b/Source/GB.h
index eac19f89e9..cc9e3bda99 100644
--- a/Source/GB.h
+++ b/Source/GB.h
@@ -2,7 +2,7 @@
 // GB.h: definitions visible only inside GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,6 +10,13 @@
 // These defintions are not visible to the user.  They are used only inside
 // GraphBLAS itself.
 
+// Future plans: (see also 'grep -r FUTURE')
+// FUTURE: support for dense matrices (A->i and A->p as NULL pointers)
+// FUTURE: implement v1.3 of the API
+// FUTURE: add matrix I/O in binary format (see draft LAGraph_binread/binwrite)
+// FUTURE: add Heap method to GB_AxB_saxpy3 (inspector-executor style)
+// FUTURE: allow matrices and vectors to be left jumbled (sort left pending)
+
 #ifndef GB_H
 #define GB_H
 
@@ -22,6 +29,13 @@
 // just before the statement:
 // #include "GB.h"
 
+// set GB_BURBLE to 1 to enable extensive diagnostic output to stdout,
+// or compile with -DGB_BURBLE=1.  This setting can also be added at the top
+// of any individual Source/* files, before #including any other files.
+#ifndef GB_BURBLE
+#define GB_BURBLE 0
+#endif
+
 // to turn on Debug for all of GraphBLAS, uncomment this line:
 // #define GB_DEBUG
 
@@ -47,6 +61,10 @@
 
 #if defined __INTEL_COMPILER
 
+//  10397: remark about where *.optrpt reports are placed
+//  15552: loop not vectorized
+#pragma warning (disable: 10397 15552 )
+
 // disable icc -w2 warnings
 //  191:  type qualifier meangingless
 //  193:  zero used for undefined #define
@@ -84,7 +102,7 @@
 #elif defined __GNUC__
 
 // disable warnings for gcc 8.2:
-#pragma GCC diagnostic ignored "-Wunknown-warning-option"
+// #pragma GCC diagnostic ignored "-Wunknown-warning-option"
 #pragma GCC diagnostic ignored "-Wint-in-bool-context"
 #pragma GCC diagnostic ignored "-Wformat-truncation="
 
@@ -195,6 +213,8 @@
 
 #endif
 
+#define GB_PRAGMA_IVDEP GB_PRAGMA(ivdep)
+
 //------------------------------------------------------------------------------
 // PGI_COMPILER_BUG
 //------------------------------------------------------------------------------
@@ -233,14 +253,12 @@
 
     // variable-length arrays are allowed
     #define GB_VLA(s) s
-    #define GB_VLA_NTHREADS(nthreads) nthreads
 
 #else
 
     // variable-length arrays are not allowed
-    #define GB_VLA_MAXSIZE_FOR_ANY_GRB_TYPE 128
-    #define GB_VLA(s) GB_VLA_MAXSIZE_FOR_ANY_GRB_TYPE
-    #define GB_VLA_NTHREADS(nthreads) GxB_NTHREADS_MAX
+    #define GB_VLA_MAXSIZE 128
+    #define GB_VLA(s) GB_VLA_MAXSIZE
 
 #endif
 
@@ -290,12 +308,15 @@
 #define GB_IMAX(x,y) (((x) > (y)) ? (x) : (y))
 #define GB_IMIN(x,y) (((x) < (y)) ? (x) : (y))
 
+// ceiling of a/b for two integers a and b
+#define GB_CEIL(a,b) (((a) + (b) - 1) / (b))
+
 //------------------------------------------------------------------------------
 // for coverage tests in Tcov/
 //------------------------------------------------------------------------------
 
 #ifdef GBCOVER
-#define GBCOVER_MAX 10000
+#define GBCOVER_MAX 20000
 GB_PUBLIC int64_t GB_cov [GBCOVER_MAX] ;
 GB_PUBLIC int GB_cover_max ;
 #endif
@@ -310,7 +331,44 @@ typedef void (*GB_cast_function) (void *, const void *, size_t) ;
 
 #define GB_LEN 128
 
-typedef struct GB_Sauna_struct *GB_Sauna ;
+//------------------------------------------------------------------------------
+// GB_mcast: cast a mask entry from any native type to boolean
+//------------------------------------------------------------------------------
+
+// The mask matrix M must be one of the native data types, which have size
+// 1, 2, 4, or 8 bytes.  The value could be properly typecasted to bool, but
+// this requires a function pointer to the proper GB_cast_function.  Instead,
+// it is faster to simply use type punning, based on the size of the data
+// type, and use the inline GB_mcast function instead.
+
+static inline bool GB_mcast         // return the value of M(i,j)
+(
+    const GB_void *GB_RESTRICT Mx,  // mask values
+    const int64_t pM,               // extract boolean value of Mx [pM]
+    const size_t msize              // size of each data type
+)
+{
+    if (Mx == NULL)
+    {
+        // If Mx is NULL, then values in the mask matrix M are ignored, and
+        // only the structural pattern is used.  This function is only called
+        // for entries M(i,j) in the structure of M, so the result is always
+        // true if Mx is NULL.
+        return (true) ;
+    }
+    else
+    {
+        // check the value of M(i,j)
+        switch (msize)
+        {
+            default:
+            case 1: return ((*(uint8_t  *) (Mx +((pM)*1))) != 0) ;
+            case 2: return ((*(uint16_t *) (Mx +((pM)*2))) != 0) ;
+            case 4: return ((*(uint32_t *) (Mx +((pM)*4))) != 0) ;
+            case 8: return ((*(uint64_t *) (Mx +((pM)*8))) != 0) ;
+        }
+    }
+}
 
 //------------------------------------------------------------------------------
 // pending tuples
@@ -340,8 +398,7 @@ typedef enum
     GB_UINT64_code  = 8,
     GB_FP32_code    = 9,
     GB_FP64_code    = 10,
-    GB_UCT_code     = 11,       // void *, compile-time user-defined type
-    GB_UDT_code     = 12        // void *, run-time user-defined type
+    GB_UDT_code     = 11        // void *, user-defined type
 }
 GB_Type_code ;                  // enumerated type code
 
@@ -374,49 +431,50 @@ typedef enum
     // TxT -> T
     //--------------------------------------------------------------------------
 
-    // 10 binary operators z=f(x,y) that return the same type as their inputs
+    // 12 binary operators z=f(x,y) that return the same type as their inputs
     GB_FIRST_opcode,    //  7: z = x
     GB_SECOND_opcode,   //  8: z = y
-    GB_MIN_opcode,      //  9: z = min(x,y)
-    GB_MAX_opcode,      // 10: z = max(x,y)
-    GB_PLUS_opcode,     // 11: z = x + y
-    GB_MINUS_opcode,    // 12: z = x - y
-    GB_RMINUS_opcode,   // 13: z = y - x
-    GB_TIMES_opcode,    // 14: z = x * y
-    GB_DIV_opcode,      // 15: z = x / y ; special cases for bool and ints
-    GB_RDIV_opcode,     // 16: z = y / x ; special cases for bool and ints
+    GB_PAIR_opcode,     //  9: z = 1
+    GB_ANY_opcode,      // 10: z = 1
+    GB_MIN_opcode,      // 11: z = min(x,y)
+    GB_MAX_opcode,      // 12: z = max(x,y)
+    GB_PLUS_opcode,     // 13: z = x + y
+    GB_MINUS_opcode,    // 14: z = x - y
+    GB_RMINUS_opcode,   // 15: z = y - x
+    GB_TIMES_opcode,    // 16: z = x * y
+    GB_DIV_opcode,      // 17: z = x / y ; special cases for bool and ints
+    GB_RDIV_opcode,     // 18: z = y / x ; special cases for bool and ints
 
     // 6 binary operators z=f(x,y), x,y,z all the same type
-    GB_ISEQ_opcode,     // 17: z = (x == y)
-    GB_ISNE_opcode,     // 18: z = (x != y)
-    GB_ISGT_opcode,     // 19: z = (x >  y)
-    GB_ISLT_opcode,     // 20: z = (x <  y)
-    GB_ISGE_opcode,     // 21: z = (x >= y)
-    GB_ISLE_opcode,     // 22: z = (x <= y)
+    GB_ISEQ_opcode,     // 19: z = (x == y)
+    GB_ISNE_opcode,     // 20: z = (x != y)
+    GB_ISGT_opcode,     // 21: z = (x >  y)
+    GB_ISLT_opcode,     // 22: z = (x <  y)
+    GB_ISGE_opcode,     // 23: z = (x >= y)
+    GB_ISLE_opcode,     // 24: z = (x <= y)
 
     // 3 binary operators that work on purely boolean values
-    GB_LOR_opcode,      // 23: z = (x != 0) || (y != 0)
-    GB_LAND_opcode,     // 23: z = (x != 0) && (y != 0)
-    GB_LXOR_opcode,     // 25: z = (x != 0) != (y != 0)
+    GB_LOR_opcode,      // 25: z = (x != 0) || (y != 0)
+    GB_LAND_opcode,     // 26: z = (x != 0) && (y != 0)
+    GB_LXOR_opcode,     // 27: z = (x != 0) != (y != 0)
 
     //--------------------------------------------------------------------------
     // TxT -> bool
     //--------------------------------------------------------------------------
 
     // 6 binary operators z=f(x,y) that return bool (TxT -> bool)
-    GB_EQ_opcode,       // 26: z = (x == y)
-    GB_NE_opcode,       // 27: z = (x != y)
-    GB_GT_opcode,       // 28: z = (x >  y)
-    GB_LT_opcode,       // 29: z = (x <  y)
-    GB_GE_opcode,       // 30: z = (x >= y)
-    GB_LE_opcode,       // 31: z = (x <= y)
+    GB_EQ_opcode,       // 28: z = (x == y)
+    GB_NE_opcode,       // 29: z = (x != y)
+    GB_GT_opcode,       // 30: z = (x >  y)
+    GB_LT_opcode,       // 31: z = (x <  y)
+    GB_GE_opcode,       // 32: z = (x >= y)
+    GB_LE_opcode,       // 33: z = (x <= y)
 
     //--------------------------------------------------------------------------
     // user-defined: unary and binary operators
     //--------------------------------------------------------------------------
 
-    GB_USER_C_opcode,   // 32: compile-time user-defined operator
-    GB_USER_R_opcode    // 33: run-time user-defined operator
+    GB_USER_opcode      // 34: user-defined operator
 }
 GB_Opcode ;
 
@@ -452,8 +510,7 @@ typedef enum
     GB_LE_THUNK_opcode  = 17,
 
     // for all user-defined select operators:  thunk is optional
-    GB_USER_SELECT_C_opcode = 18,   // defined at compile-time
-    GB_USER_SELECT_R_opcode = 19    // defined at run-time
+    GB_USER_SELECT_opcode = 18
 }
 GB_Select_Opcode ;
 
@@ -505,8 +562,7 @@ struct GB_SelectOp_opaque   // content of GxB_SelectOp
 typedef enum
 {
     GB_BUILTIN,             // 0: built-in monoid or semiring
-    GB_USER_COMPILED,       // 1: pre-compiled user monoid or semiring
-    GB_USER_RUNTIME         // 2: user monoid or semiring created a run-time
+    GB_USER_RUNTIME         // 2: user monoid or semiring
 }
 GB_object_code ;
 
@@ -516,7 +572,7 @@ struct GB_Monoid_opaque     // content of GrB_Monoid
     GrB_BinaryOp op ;       // binary operator of the monoid
     void *identity ;        // identity of the monoid
     size_t op_ztype_size ;  // size of the type (also is op->ztype->size)
-    GB_object_code object_kind ;   // built-in, user pre-compiled, or run-time
+    GB_object_code object_kind ;   // built-in or user defined
     void *terminal ;        // value that triggers early-exit (NULL if no value)
 } ;
 
@@ -525,7 +581,7 @@ struct GB_Semiring_opaque   // content of GrB_Semiring
     int64_t magic ;         // for detecting uninitialized objects
     GrB_Monoid add ;        // add operator of the semiring
     GrB_BinaryOp multiply ; // multiply operator of the semiring
-    GB_object_code object_kind ;   // built-in, user pre-compiled, or run-time
+    GB_object_code object_kind ;   // built-in or user defined
 } ;
 
 struct GB_Scalar_opaque     // content of GxB_Scalar: 1-by-1 standard CSC matrix
@@ -553,6 +609,7 @@ struct GB_Descriptor_opaque // content of GrB_Descriptor
     GrB_Desc_Value axb ;    // for selecting the method for C=A*B
     int nthreads_max ;      // max # threads to use in this call to GraphBLAS
     double chunk ;          // chunk size for # of threads for small problems
+    bool predefined ;       // if true, descriptor is predefined
 } ;
 
 //------------------------------------------------------------------------------
@@ -673,6 +730,94 @@ int64_t GB_Pending_n        // return # of pending tuples in A
 
 #include "GB_Global.h"
 
+//------------------------------------------------------------------------------
+// burble
+//------------------------------------------------------------------------------
+
+// GB_BURBLE is meant for development use, not production use.  To enable it,
+// set GB_BURBLE to 1, either with -DGB_BURBLE=1 as a compiler option, by
+// editting the setting above, or by adding the line
+//
+//      #define GB_BURBLE 1
+//
+// at the top of any source file, before #including any other file.  After
+// enabling it in the library, use GxB_set (GxB_BURBLE, true) to turn it on
+// at run time, and GxB_set (GxB_BURBLE, false) to turn it off.  By default,
+// the feature is not enabled when SuiteSparse:GraphBLAS is compiled, and
+// even then, the setting is set to false by GrB_init.
+
+#if GB_BURBLE
+
+// define the printf function to use to burble
+#include "GB_printf.h"
+#define GBBURBLE(...)                               \
+{                                                   \
+    bool burble = GB_Global_burble_get ( ) ;        \
+    if (burble)                                     \
+    {                                               \
+        if (GB_printf_function != NULL)             \
+        {                                           \
+            GB_printf_function (__VA_ARGS__) ;      \
+        }                                           \
+        else                                        \
+        {                                           \
+            printf (__VA_ARGS__) ;                  \
+            fflush (stdout) ;                       \
+        }                                           \
+    }                                               \
+}
+
+#if defined ( _OPENMP )
+
+// burble with timing
+#define GB_BURBLE_START(func)                       \
+double t_burble = 0 ;                               \
+bool burble = GB_Global_burble_get ( ) ;            \
+{                                                   \
+    if (burble)                                     \
+    {                                               \
+        GBBURBLE (" [ " func " ") ;                 \
+        t_burble = GB_OPENMP_GET_WTIME ;            \
+    }                                               \
+}
+
+#define GB_BURBLE_END                               \
+{                                                   \
+    if (burble)                                     \
+    {                                               \
+        t_burble = GB_OPENMP_GET_WTIME - t_burble ; \
+        GBBURBLE ("%.3g sec ]\n", t_burble) ;       \
+    }                                               \
+}
+
+#else
+
+// burble with no timing
+#define GB_BURBLE_START(func)                   \
+    GBBURBLE (" [ " func " ")
+
+#define GB_BURBLE_END                           \
+    GBBURBLE ("]\n")
+
+#endif
+
+#define GB_BURBLE_N(n,...)                      \
+    if (n > 1) GBBURBLE (__VA_ARGS__)
+
+#define GB_BURBLE_MATRIX(A, ...)                \
+    if (!(A->vlen <= 1 && A->vdim <= 1)) GBBURBLE (__VA_ARGS__)
+
+#else
+
+// no burble
+#define GBBURBLE(...)
+#define GB_BURBLE_START(func)
+#define GB_BURBLE_END
+#define GB_BURBLE_N(n,...)
+#define GB_BURBLE_MATRIX(A,...)
+
+#endif
+
 //------------------------------------------------------------------------------
 // debugging definitions
 //------------------------------------------------------------------------------
@@ -942,7 +1087,7 @@ typedef GB_Context_struct *GB_Context ;
 // logged by the GB_WHERE macro.
 
 #ifndef GB_PANIC
-#define GB_PANIC return (GrB_PANIC) ;
+#define GB_PANIC return (GrB_PANIC)
 #endif
 
 #define GB_CONTEXT(where_string)                                    \
@@ -953,7 +1098,7 @@ typedef GB_Context_struct *GB_Context ;
     Context->where = where_string ;                                 \
     /* get the default max # of threads and default chunk size */   \
     Context->nthreads_max = GB_Global_nthreads_max_get ( ) ;        \
-    Context->chunk = GB_Global_chunk_get ( ) ;
+    Context->chunk = GB_Global_chunk_get ( )
 
 #define GB_WHERE(where_string)                                      \
     if (!GB_Global_GrB_init_called_get ( ))                         \
@@ -961,7 +1106,7 @@ typedef GB_Context_struct *GB_Context ;
         /* GrB_init (or GxB_init) has not been called! */           \
         GB_PANIC ;                                                  \
     }                                                               \
-    GB_CONTEXT (where_string) ;
+    GB_CONTEXT (where_string)
 
 //------------------------------------------------------------------------------
 // GB_GET_NTHREADS_MAX:  determine max # of threads for OpenMP parallelism.
@@ -1359,6 +1504,15 @@ GrB_Info GB_dup             // make an exact copy of a matrix
     GB_Context Context
 ) ;
 
+GrB_Info GB_dup2            // make an exact copy of a matrix
+(
+    GrB_Matrix *Chandle,    // handle of output matrix to create 
+    const GrB_Matrix A,     // input matrix to copy
+    const bool numeric,     // if true, duplicate the numeric values
+    const GrB_Type ctype,   // type of C, if numeric is false
+    GB_Context Context
+) ;
+
 void GB_memcpy                  // parallel memcpy
 (
     void *dest,                 // destination
@@ -1374,7 +1528,7 @@ GrB_Info GB_nvals           // get the number of entries in a matrix
     GB_Context Context
 ) ;
 
-GrB_Info GB_type            // get the type of a matrix
+GrB_Info GB_matvec_type            // get the type of a matrix
 (
     GrB_Type *type,         // returns the type of the matrix
     const GrB_Matrix A,     // matrix to query
@@ -1406,9 +1560,7 @@ GrB_Info GB_ix_resize           // resize a matrix
 
 // free A->i and A->x and return if critical section fails
 #define GB_IX_FREE(A)                                                       \
-{                                                                           \
-    if (GB_ix_free (A) == GrB_PANIC) GB_PANIC ;                             \
-}
+    if (GB_ix_free (A) == GrB_PANIC) GB_PANIC
 
 GrB_Info GB_ix_free             // free A->i and A->x of a matrix
 (
@@ -1422,9 +1574,7 @@ void GB_ph_free                 // free A->p and A->h of a matrix
 
 // free all content, and return if critical section fails
 #define GB_PHIX_FREE(A)                                                     \
-{                                                                           \
-    if (GB_phix_free (A) == GrB_PANIC) GB_PANIC ;                           \
-}
+    if (GB_phix_free (A) == GrB_PANIC) GB_PANIC
 
 GrB_Info GB_phix_free           // free all content of a matrix
 (
@@ -1450,9 +1600,9 @@ bool GB_code_compatible         // check if two types can be typecast
 
 void GB_cast_array              // typecast an array
 (
-    GB_void *GB_RESTRICT Cx,       // output array
+    GB_void *Cx,                // output array
     const GB_Type_code code1,   // type code for Cx
-    const GB_void *GB_RESTRICT Ax, // input array
+    GB_void *Ax,                // input array
     const GB_Type_code code2,   // type code for Ax
     const int64_t anz,          // number of entries in Cx and Ax
     GB_Context Context
@@ -1757,16 +1907,12 @@ void GB_free_memory
 #else
 
 #define GB_NEW(A,type,vlen,vdim,Ap_option,is_csc,hopt,h,plen,Context)         \
-{                                                                             \
     info = GB_new (A, type, vlen, vdim, Ap_option, is_csc, hopt, h, plen,     \
-        Context) ;                                                            \
-}
+        Context)
 
 #define GB_CREATE(A,type,vlen,vdim,Ap_option,is_csc,hopt,h,plen,anz,numeric,Context)  \
-{                                                                             \
     info = GB_create (A, type, vlen, vdim, Ap_option, is_csc, hopt, h, plen,  \
-        anz, numeric, Context) ;                                              \
-}
+        anz, numeric, Context)
 
 #define GB_MATRIX_FREE(A)                                                     \
 {                                                                             \
@@ -1778,13 +1924,13 @@ void GB_free_memory
 #define GB_SCALAR_FREE(v) GB_MATRIX_FREE ((GrB_Matrix *) v)
 
 #define GB_CALLOC_MEMORY(p,n,s)                                               \
-    p = GB_calloc_memory (n, s) ;
+    p = GB_calloc_memory (n, s)
 
 #define GB_MALLOC_MEMORY(p,n,s)                                               \
-    p = GB_malloc_memory (n, s) ;
+    p = GB_malloc_memory (n, s)
 
 #define GB_REALLOC_MEMORY(p,nnew,nold,s,ok)                                   \
-    p = GB_realloc_memory (nnew, nold, s, (void *) p, ok) ;
+    p = GB_realloc_memory (nnew, nold, s, (void *) p, ok)
 
 #define GB_FREE_MEMORY(p,n,s)                                                 \
 {                                                                             \
@@ -1802,23 +1948,6 @@ GrB_Type GB_code_type           // return the GrB_Type corresponding to the code
     const GrB_Type type         // user type if code is GB_UDT_code
 ) ;
 
-// used in GB_AxB_heap for temporary workspace
-typedef struct
-{
-    int64_t start ;                 // first entry of A(:,k) is at Ai [start]
-    int64_t end ;                   // last entry of A(:,k) is at Ai [end-1]
-}
-GB_pointer_pair ;
-
-// used in GB_heap_*
-typedef struct
-{
-    int64_t key ;       // the key for this element, for ordering in the Heap
-    int64_t name ;      // the name of the element; not used in these functions
-                        // but required by the caller
-}
-GB_Element ;
-
 GrB_Info GB_slice       // slice B into nthreads slices or hyperslices
 (
     GrB_Matrix B,       // matrix to slice
@@ -1848,11 +1977,11 @@ void GB_eslice
 bool GB_binop_builtin               // true if binary operator is builtin
 (
     // inputs:
-    const GrB_Matrix A,
+    const GrB_Type A_type,
     const bool A_is_pattern,        // true if only the pattern of A is used
-    const GrB_Matrix B,
+    const GrB_Type B_type,
     const bool B_is_pattern,        // true if only the pattern of B is used
-    const GrB_BinaryOp op,          // binary operator
+    const GrB_BinaryOp op,          // binary operator; may be NULL
     const bool flipxy,              // true if z=op(y,x), flipping x and y
     // outputs, unused by caller if this function returns false
     GB_Opcode *opcode,              // opcode for the binary operator
@@ -1860,11 +1989,11 @@ bool GB_binop_builtin               // true if binary operator is builtin
     GB_Type_code *zcode             // type code for z output
 ) ;
 
-void GB_cumsum                  // compute the cumulative sum of an array
+void GB_cumsum                      // cumulative sum of an array
 (
-    int64_t *GB_RESTRICT count,    // size n+1, input/output
+    int64_t *GB_RESTRICT count,     // size n+1, input/output
     const int64_t n,
-    int64_t *GB_RESTRICT kresult,  // return k, if needed by the caller
+    int64_t *GB_RESTRICT kresult,   // return k, if needed by the caller
     int nthreads
 ) ;
 
@@ -1873,6 +2002,7 @@ GrB_Info GB_Descriptor_get      // get the contents of a descriptor
     const GrB_Descriptor desc,  // descriptor to query, may be NULL
     bool *C_replace,            // if true replace C before C<M>=Z
     bool *Mask_comp,            // if true use logical negation of M
+    bool *Mask_struct,          // if true use the structure of M
     bool *In0_transpose,        // if true transpose first input
     bool *In1_transpose,        // if true transpose second input
     GrB_Desc_Value *AxB_method, // method for C=A*B
@@ -1979,6 +2109,23 @@ GrB_Info GB_wait                // finish all pending computations
     GB_Context Context
 ) ;
 
+//------------------------------------------------------------------------------
+// GB_is_dense: check if a matrix is completely dense
+//------------------------------------------------------------------------------
+
+static inline bool GB_is_dense
+(
+    const GrB_Matrix A
+)
+{
+    // check if A is competely dense:  all entries present.
+    // zombies and pending tuples are not considered
+    if (A == NULL) return (false) ;
+    GrB_Index anzmax ;
+    bool ok = GB_Index_multiply (&anzmax, A->vlen, A->vdim) ;
+    return (ok && (anzmax == GB_NNZ (A))) ;
+}
+
 //------------------------------------------------------------------------------
 // OpenMP definitions
 //------------------------------------------------------------------------------
@@ -1991,8 +2138,7 @@ GrB_Info GB_wait                // finish all pending computations
 // thread tid will operate on the range k1:(k2-1)
 #define GB_PARTITION(k1,k2,n,tid,nthreads)                                  \
     k1 = ((tid) ==  0          ) ?  0  : GB_PART ((tid),  n, nthreads) ;    \
-    k2 = ((tid) == (nthreads)-1) ? (n) : GB_PART ((tid)+1,n, nthreads) ;
-
+    k2 = ((tid) == (nthreads)-1) ? (n) : GB_PART ((tid)+1,n, nthreads)
 
 #if defined ( _OPENMP )
 
@@ -2000,17 +2146,19 @@ GrB_Info GB_wait                // finish all pending computations
     #define GB_OPENMP_THREAD_ID         omp_get_thread_num ( )
     #define GB_OPENMP_MAX_THREADS       omp_get_max_threads ( )
     #define GB_OPENMP_GET_NUM_THREADS   omp_get_num_threads ( )
+    #define GB_OPENMP_GET_WTIME         omp_get_wtime ( )
 
 #else
 
     #define GB_OPENMP_THREAD_ID         (0)
     #define GB_OPENMP_MAX_THREADS       (1)
     #define GB_OPENMP_GET_NUM_THREADS   (1)
+    #define GB_OPENMP_GET_WTIME         (0)
 
 #endif
 
-// by default, give each thread at least 4096 units of work to do
-#define GB_CHUNK_DEFAULT 4096
+// by default, give each thread at least 64K units of work to do
+#define GB_CHUNK_DEFAULT (64*1024)
 
 //------------------------------------------------------------------------------
 // GB_queue operations
@@ -2052,7 +2200,7 @@ bool GB_queue_status            // get the queue status of a matrix
 GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
 (
     GrB_Matrix C,                   // matrix to modify
-    const void *scalar,             // scalar to set
+    void *scalar,                   // scalar to set
     const GrB_Index row,            // row index
     const GrB_Index col,            // column index
     const GB_Type_code scalar_code, // type of the scalar
@@ -2130,8 +2278,8 @@ GrB_Info GB_to_hyper_conform    // conform a matrix to its desired format
 GrB_Info GB_hyper_prune
 (
     // output, not allocated on input:
-    int64_t *GB_RESTRICT *p_Ap,        // size nvec+1
-    int64_t *GB_RESTRICT *p_Ah,        // size nvec
+    int64_t *GB_RESTRICT *p_Ap,     // size nvec+1
+    int64_t *GB_RESTRICT *p_Ah,     // size nvec
     int64_t *p_nvec,                // # of vectors, all nonempty
     // input, not modified
     const int64_t *Ap_old,          // size nvec_old+1
@@ -2140,6 +2288,12 @@ GrB_Info GB_hyper_prune
     GB_Context Context
 ) ;
 
+GrB_Info GB_hypermatrix_prune
+(
+    GrB_Matrix A,               // matrix to prune
+    GB_Context Context
+) ;
+
 //------------------------------------------------------------------------------
 // critical section for user threads
 //------------------------------------------------------------------------------
@@ -2216,12 +2370,13 @@ GB_PUBLIC mtx_t GB_sync ;
 
 // check the descriptor and extract its contents; also copies
 // nthreads_max and chunk from the descriptor to the Context
-#define GB_GET_DESCRIPTOR(info,desc,dout,dm,d0,d1,dalgo)                     \
+#define GB_GET_DESCRIPTOR(info,desc,dout,dmc,dms,d0,d1,dalgo)                \
     GrB_Info info ;                                                          \
-    bool dout, dm, d0, d1 ;                                                  \
+    bool dout, dmc, dms, d0, d1 ;                                            \
     GrB_Desc_Value dalgo ;                                                   \
     /* if desc is NULL then defaults are used.  This is OK */                \
-    info = GB_Descriptor_get (desc, &dout, &dm, &d0, &d1, &dalgo, Context) ; \
+    info = GB_Descriptor_get (desc, &dout, &dmc, &dms, &d0, &d1, &dalgo,     \
+        Context) ;                                                           \
     if (info != GrB_SUCCESS)                                                 \
     {                                                                        \
         /* desc not NULL, but uninitialized or an invalid object */          \
@@ -2536,7 +2691,7 @@ GB_PUBLIC mtx_t GB_sync ;
 // The list X [pleft ... pright] is in ascending order.  It may have
 // duplicates.
 
-#define GB_BINARY_TRIM_SEARCH(i,X,pleft,pright)                             \
+#define GB_TRIM_BINARY_SEARCH(i,X,pleft,pright)                             \
 {                                                                           \
     /* binary search of X [pleft ... pright] for integer i */               \
     while (pleft < pright)                                                  \
@@ -2567,11 +2722,11 @@ GB_PUBLIC mtx_t GB_sync ;
 // The value X [pleft] may be either < or > i.
 #define GB_BINARY_SEARCH(i,X,pleft,pright,found)                            \
 {                                                                           \
-    GB_BINARY_TRIM_SEARCH (i, X, pleft, pright) ;                           \
+    GB_TRIM_BINARY_SEARCH (i, X, pleft, pright) ;                           \
     found = (pleft == pright && X [pleft] == i) ;                           \
 }
 
-// GB_BINARY_SPLIT_SEARCH
+// GB_SPLIT_BINARY_SEARCH
 // If found is true then X [pleft] == i.  If duplicates appear then X [pleft]
 //    is any one of the entries with value i in the list.
 // If found is false then
@@ -2580,7 +2735,7 @@ GB_PUBLIC mtx_t GB_sync ;
 // If X has no duplicates, then whether or not i is found,
 //    X [original_pleft ... pleft-1] < i and
 //    X [pleft ... original_pright] >= i holds.
-#define GB_BINARY_SPLIT_SEARCH(i,X,pleft,pright,found)                      \
+#define GB_SPLIT_BINARY_SEARCH(i,X,pleft,pright,found)                      \
 {                                                                           \
     GB_BINARY_SEARCH (i, X, pleft, pright, found)                           \
     if (!found && (pleft == pright))                                        \
@@ -2597,10 +2752,10 @@ GB_PUBLIC mtx_t GB_sync ;
 }
 
 //------------------------------------------------------------------------------
-// GB_BINARY_ZOMBIE
+// binary search in the presence of zombies
 //------------------------------------------------------------------------------
 
-#define GB_BINARY_TRIM_ZOMBIE(i,X,pleft,pright)                             \
+#define GB_TRIM_BINARY_SEARCH_ZOMBIE(i,X,pleft,pright)                      \
 {                                                                           \
     /* binary search of X [pleft ... pright] for integer i */               \
     while (pleft < pright)                                                  \
@@ -2622,11 +2777,11 @@ GB_PUBLIC mtx_t GB_sync ;
     ASSERT (pleft == pright || pleft == pright + 1) ;                       \
 }
 
-#define GB_BINARY_ZOMBIE(i,X,pleft,pright,found,nzombies,is_zombie)         \
+#define GB_BINARY_SEARCH_ZOMBIE(i,X,pleft,pright,found,nzombies,is_zombie)  \
 {                                                                           \
     if (nzombies > 0)                                                       \
     {                                                                       \
-        GB_BINARY_TRIM_ZOMBIE (i, X, pleft, pright) ;                       \
+        GB_TRIM_BINARY_SEARCH_ZOMBIE (i, X, pleft, pright) ;                \
         found = false ;                                                     \
         is_zombie = false ;                                                 \
         if (pleft == pright)                                                \
@@ -2647,11 +2802,11 @@ GB_PUBLIC mtx_t GB_sync ;
     }                                                                       \
 }
 
-#define GB_BINARY_SPLIT_ZOMBIE(i,X,pleft,pright,found,nzombies,is_zombie)   \
+#define GB_SPLIT_BINARY_SEARCH_ZOMBIE(i,X,pleft,pright,found,nzom,is_zombie) \
 {                                                                           \
-    if (nzombies > 0)                                                       \
+    if (nzom > 0)                                                           \
     {                                                                       \
-        GB_BINARY_TRIM_ZOMBIE (i, X, pleft, pright) ;                       \
+        GB_TRIM_BINARY_SEARCH_ZOMBIE (i, X, pleft, pright) ;                \
         found = false ;                                                     \
         is_zombie = false ;                                                 \
         if (pleft == pright)                                                \
@@ -2679,7 +2834,7 @@ GB_PUBLIC mtx_t GB_sync ;
     else                                                                    \
     {                                                                       \
         is_zombie = false ;                                                 \
-        GB_BINARY_SPLIT_SEARCH(i,X,pleft,pright,found)                      \
+        GB_SPLIT_BINARY_SEARCH(i,X,pleft,pright,found)                      \
     }                                                                       \
 }
 
@@ -2695,14 +2850,14 @@ GB_PUBLIC mtx_t GB_sync ;
 static inline bool GB_lookup        // find j = Ah [k] in a hyperlist
 (
     const bool A_is_hyper,          // true if A is hypersparse
-    const int64_t *GB_RESTRICT Ah,     // A->h [0..A->nvec-1]: list of vectors
-    const int64_t *GB_RESTRICT Ap,     // A->p [0..A->nvec  ]: pointers to vectors
-    int64_t *GB_RESTRICT pleft,        // look only in A->h [pleft..pright]
+    const int64_t *GB_RESTRICT Ah,  // A->h [0..A->nvec-1]: list of vectors
+    const int64_t *GB_RESTRICT Ap,  // A->p [0..A->nvec  ]: pointers to vectors
+    int64_t *GB_RESTRICT pleft,     // look only in A->h [pleft..pright]
     int64_t pright,                 // normally A->nvec-1, but can be trimmed
 //  const int64_t nvec,             // A->nvec: number of vectors
     const int64_t j,                // vector to find, as j = Ah [k]
-    int64_t *GB_RESTRICT pstart,       // start of vector: Ap [k]
-    int64_t *GB_RESTRICT pend          // end of vector: Ap [k+1]
+    int64_t *GB_RESTRICT pstart,    // start of vector: Ap [k]
+    int64_t *GB_RESTRICT pend       // end of vector: Ap [k+1]
 )
 {
     if (A_is_hyper)
diff --git a/Source/GB_AxB_Gustavson.c b/Source/GB_AxB_Gustavson.c
deleted file mode 100644
index d6c9380724..0000000000
--- a/Source/GB_AxB_Gustavson.c
+++ /dev/null
@@ -1,445 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson: C=A*B or C<M>=A*B, gather/scatter-based saxpy method.
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// This method is agnostic to the CSR/CSC format.  The format of C is set
-// to CSC but this is a placeholder that will be changed in GB_AxB_meta.
-
-// Does not log an error; returns GrB_SUCCESS, GrB_OUT_OF_MEMORY, or GrB_PANIC.
-
-// This work is done by a single thread, which is computing a submatrix of the
-// final result.  Parallelism is handled in GB_AxB_saxpy_parallel.
-
-#include "GB_mxm.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
-#include "GB_bracket.h"
-#include "GB_sort.h"
-#include "GB_iterator.h"
-#ifndef GBCOMPACT
-#include "GB_AxB__include.h"
-#endif
-
-// C=A*B failed, free everything, even the Sauna
-#define GB_FREE_ALL                         \
-{                                           \
-    GB_MATRIX_FREE (Chandle) ;              \
-    GB_Sauna_free (Sauna_id) ;              \
-}
-
-GrB_Info GB_AxB_Gustavson           // C=A*B or C<M>=A*B, Gustavson's method
-(
-    GrB_Matrix *Chandle,            // output matrix
-    const GrB_Matrix M_in,          // optional matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    bool *mask_applied,             // if true, mask was applied
-    const int Sauna_id              // Sauna to use
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    // only one thread does this entire function
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT (Chandle != NULL) ;
-    ASSERT_MATRIX_OK_OR_NULL (M_in, "M for Gustavson C<M>=A*B", GB0) ;
-    ASSERT_MATRIX_OK (A, "A for Gustavson C=A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for Gustavson C=A*B", GB0) ;
-    ASSERT (!GB_PENDING (M_in)) ; ASSERT (!GB_ZOMBIES (M_in)) ;
-    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
-    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
-    ASSERT (A->vdim == B->vlen) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for Gustavson A*B", GB0) ;
-    ASSERT (Sauna_id >= 0 && Sauna_id < GxB_NTHREADS_MAX) ;
-    ASSERT (mask_applied != NULL) ;
-
-    //--------------------------------------------------------------------------
-    // determine size and hypersparsity of C
-    //--------------------------------------------------------------------------
-
-    GrB_Info info ;
-
-    (*Chandle) = NULL ;
-
-    // Gustavson's method does not handle a complemented mask
-    GrB_Matrix M = (Mask_comp ? NULL : M_in) ;
-
-    int64_t cvlen = A->vlen ;
-    int64_t cvdim = B->vdim ;
-
-    //--------------------------------------------------------------------------
-    // get the semiring operators
-    //--------------------------------------------------------------------------
-
-    GrB_BinaryOp mult = semiring->multiply ;
-    GrB_Monoid add = semiring->add ;
-    ASSERT (mult->ztype == add->op->ztype) ;
-
-    bool op_is_first  = semiring->multiply->opcode == GB_FIRST_opcode ;
-    bool op_is_second = semiring->multiply->opcode == GB_SECOND_opcode ;
-    bool A_is_pattern = false ;
-    bool B_is_pattern = false ;
-
-    if (flipxy)
-    { 
-        // z = fmult (b,a) will be computed
-        A_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
-        ASSERT (GB_IMPLIES (!A_is_pattern,
-            GB_Type_compatible (A->type, mult->ytype))) ;
-        ASSERT (GB_IMPLIES (!B_is_pattern,
-            GB_Type_compatible (B->type, mult->xtype))) ;
-    }
-    else
-    { 
-        // z = fmult (a,b) will be computed
-        A_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
-        ASSERT (GB_IMPLIES (!A_is_pattern,
-            GB_Type_compatible (A->type, mult->xtype))) ;
-        ASSERT (GB_IMPLIES (!B_is_pattern,
-            GB_Type_compatible (B->type, mult->ytype))) ;
-    }
-
-    // these asserts hold for any valid semiring:
-    ASSERT (mult->ztype == add->op->ztype) ;
-    ASSERT (add->op->ztype == add->op->xtype) ;
-    ASSERT (add->op->ztype == add->op->ytype) ;
-
-    size_t zsize = mult->ztype->size ;
-
-    //--------------------------------------------------------------------------
-    // allocate the Sauna
-    //--------------------------------------------------------------------------
-
-    GB_Sauna Sauna = GB_Global_Saunas_get (Sauna_id) ;
-    if (Sauna == NULL || Sauna->Sauna_n < cvlen || Sauna->Sauna_size < zsize)
-    { 
-        // get a new Sauna: the Sauna either does not exist, or is too small
-        GB_Sauna_free (Sauna_id) ;
-        GB_OK (GB_Sauna_alloc (Sauna_id, cvlen, zsize)) ;
-        Sauna = GB_Global_Saunas_get (Sauna_id) ;
-    }
-
-    int64_t *GB_RESTRICT Sauna_Mark = Sauna->Sauna_Mark ;
-
-    // Sauna_Mark [0..cvlen-1] < hiwater holds
-    ASSERT_SAUNA_IS_RESET ;
-
-    //--------------------------------------------------------------------------
-    // allocate C (just the pattern)
-    //--------------------------------------------------------------------------
-
-    GB_OK (GB_AxB_alloc (Chandle, GrB_BOOL, cvlen, cvdim, M, A, B, false,
-        cvlen)) ;
-
-    GrB_Matrix C = (*Chandle) ;
-    ASSERT (C != NULL) ;
-    ASSERT (C->x == NULL) ;
-
-    //==========================================================================
-    // symbolic analysis when no mask is present
-    //==========================================================================
-
-    if (M == NULL)
-    {
-        bool A_is_hyper = GB_IS_HYPER (A) ;
-        if (A_is_hyper || GB_IS_HYPER (B) || GB_IS_HYPER (C))
-        { 
-            // symbolic analysis when one or more matrix is hypersparse
-            #define GB_HYPER_CASE
-            #include "GB_AxB_Gustavson_symbolic.c"
-            #undef GB_HYPER_CASE
-        }
-        else
-        { 
-            // symbolic analysis when no matrix is hypersparse
-            #include "GB_AxB_Gustavson_symbolic.c"
-        }
-
-        // FUTURE: if A and B are pattern-only and the semiring is AND_OR
-        // or OR_AND (perhaps others) the C is pattern-only, and the values
-        // of C do not need to be computed.  The work is done here.
-    }
-
-    //==========================================================================
-    // numerical phase
-    //==========================================================================
-
-    //--------------------------------------------------------------------------
-    // allocate C->x
-    //--------------------------------------------------------------------------
-
-    // C has the same type as z for z=fmult(x,y).  The type is also the same as
-    // the monoid of the semiring.  The types of A and/or B, and their
-    // typecasting, has no effect on the size of the type of C or the Sauna
-    // workspace.
-
-    C->type = mult->ztype ;
-    C->type_size = zsize ;
-
-    GB_void t [GB_VLA(zsize)] ;
-
-    GB_MALLOC_MEMORY (C->x, C->nzmax, zsize) ;
-    if (C->x == NULL)
-    { 
-        // out of memory
-        GB_FREE_ALL ;
-        return (GrB_OUT_OF_MEMORY) ;
-    }
-
-    C->x_shallow = false ;
-
-    // Sauna_Work has size cvlen, each entry of size zsize.  Not initialized.
-    GB_void *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-
-    if (M != NULL)
-    { 
-        ASSERT (M->vlen == C->vlen && M->vdim == C->vdim) ;
-    }
-
-    // Gustavson's method cannot fail at this point.  C and the Sauna workspace
-    // have already been allocated above.
-
-    //--------------------------------------------------------------------------
-    // compute C = A*B for built-in types and operators
-    //--------------------------------------------------------------------------
-
-    ASSERT_TYPE_OK (A->type, "A type for Gustavson builtin", GB0) ;
-    ASSERT_TYPE_OK (B->type, "B type for Gustavson builtin", GB0) ;
-    ASSERT_TYPE_OK (C->type, "C type for Gustavson builtin", GB0) ;
-
-#ifndef GBCOMPACT
-
-    // If the GB_AxB_Gustavson_builtin function has a worker for the particular
-    // semiring, then it does the computation and returns info != GrB_NO_VALUE.
-    // Otherwise, it returns info as GrB_NO_VALUE, and the generic worker below
-    // does the work.
-
-    // If GBCOMPACT is enabled at compile-time, then no built-in workers
-    // are created, and this function is not used.  All C=A*B computations
-    // are done with the generic worker below.
-
-    info = GB_AxB_Gustavson_builtin (C, M, A, A_is_pattern,
-        B, B_is_pattern, semiring, flipxy, Sauna) ;
-    ASSERT (info == GrB_SUCCESS || info == GrB_NO_VALUE) ;
-    if (info == GrB_SUCCESS)
-    { 
-        // C = A*B has been done via a hard-coded case
-        ASSERT_MATRIX_OK (C, "C hard-coded for Gustavson C=A*B", GB0) ;
-        ASSERT (*Chandle == C) ;
-        ASSERT_SAUNA_IS_RESET ;
-        (*mask_applied) = (M != NULL) ;
-        return (GrB_SUCCESS) ;
-    }
-
-#endif
-
-    //--------------------------------------------------------------------------
-    // user semirings created at compile time
-    //--------------------------------------------------------------------------
-
-    if (semiring->object_kind == GB_USER_COMPILED)
-    {
-        // determine the required type of A and B for the user semiring
-        GrB_Type atype_required, btype_required ;
-
-        if (flipxy)
-        { 
-            // A is passed as y, and B as x, in z = mult(x,y)
-            atype_required = mult->ytype ;
-            btype_required = mult->xtype ;
-        }
-        else
-        { 
-            // A is passed as x, and B as y, in z = mult(x,y)
-            atype_required = mult->xtype ;
-            btype_required = mult->ytype ;
-        }
-
-        if (A->type == atype_required && B->type == btype_required)
-        { 
-            info = GB_AxB_user (GxB_AxB_GUSTAVSON, semiring, Chandle, M, A, B,
-                flipxy,
-                /* heap: */ NULL, NULL, NULL, 0,
-                /* Gustavson: */ Sauna,
-                /* dot2: */ NULL, NULL, 1, 1, 1, NULL,
-                /* dot3: */ NULL, 0) ;
-            (*mask_applied) = (M != NULL) && (info == GrB_SUCCESS) ;
-            return (info) ;
-        }
-    }
-
-    info = GrB_SUCCESS ;
-
-    //--------------------------------------------------------------------------
-    // determine the required types of A and B, for typecasting
-    //--------------------------------------------------------------------------
-
-    GrB_Type atype_required, btype_required ;
-    if (flipxy)
-    { 
-        // A is passed as y, and B as x, in z = mult(x,y)
-        atype_required = A_is_pattern ? A->type : mult->ytype ;
-        btype_required = B_is_pattern ? B->type : mult->xtype ;
-    }
-    else
-    { 
-        // A is passed as x, and B as y, in z = mult(x,y)
-        atype_required = A_is_pattern ? A->type : mult->xtype ;
-        btype_required = B_is_pattern ? B->type : mult->ytype ;
-    }
-    bool no_typecasting = (A->type == atype_required)
-                       && (B->type == btype_required) ;
-
-    //--------------------------------------------------------------------------
-    // generic Gustavson, any semiring, with or without typecasting
-    //--------------------------------------------------------------------------
-
-    // Define operations for GB_AxB_Gustavson_mask and GB_AxB_Gustavson_nomask,
-    // whether or not typecasting is needed.
-
-    #define GB_IDENTITY      identity
-    #define GB_SAUNA_WORK(i) (Sauna_Work +((i)*zsize))
-    #define GB_CX(p)         (Cx +((p)*zsize))
-    #define GB_COPY_C(z,x)   memcpy (z, x, zsize) ;
-
-    size_t asize = A_is_pattern ? 0 : A->type->size ;
-    size_t bsize = B_is_pattern ? 0 : B->type->size ;
-
-    size_t xsize = mult->xtype->size ;
-    size_t ysize = mult->ytype->size ;
-
-    // scalar workspace (after typecasting, if needed)
-    // flipxy false: aik = (xtype) A(i,k) and bkj = (ytype) B(k,j)
-    // flipxy true:  aik = (ytype) A(i,k) and bkj = (xtype) B(k,j)
-    GB_void aik [GB_VLA(flipxy ? ysize : xsize)] ;
-    GB_void bkj [GB_VLA(flipxy ? xsize : ysize)] ;
-
-    GxB_binary_function fmult = mult->function ;
-    GxB_binary_function fadd  = add->op->function ;
-    GB_void *GB_RESTRICT identity = add->identity ;
-    GB_void *GB_RESTRICT Cx = C->x ;
-
-    #define GB_ATYPE GB_void
-    #define GB_BTYPE GB_void
-
-    // C(i,j) = A(i,k) * B(k,j)
-    #define GB_MULT(cij, aik, bkj)                                      \
-        GB_MULTIPLY (cij, aik, bkj) ;                                   \
-
-    // C(i,j) += A(i,k) * B(k,j)
-    #define GB_MULTADD(cij, aik, bkj)                                   \
-        GB_MULTIPLY (t, aik, bkj) ;                                     \
-        fadd (cij, cij, t) ;
-
-    #define GB_GENERIC
-
-    if (no_typecasting)
-    { 
-
-        //----------------------------------------------------------------------
-        // generic C=A*B or C<M>=A*B with any semiring, but no typecasting
-        //----------------------------------------------------------------------
-
-        // aik = &A(i,k), of size asize
-        #define GB_GETA(aik,Ax,pA)                                          \
-            const GB_void *aik = A_is_pattern ? NULL : (Ax +((pA)*asize)) ;
-
-        // bkj = B(k,j), of size bsize
-        #define GB_GETB(bkj,Bx,pB)                                          \
-            if (!B_is_pattern) memcpy (bkj, Bx +((pB)*bsize), bsize) ;
-
-        if (flipxy)
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,y,x)
-            #include "GB_AxB_Gustavson_meta.c"
-            #undef GB_MULTIPLY
-        }
-        else
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,x,y)
-            #include "GB_AxB_Gustavson_meta.c"
-            #undef GB_MULTIPLY
-        }
-
-    }
-    else
-    {
-
-        //----------------------------------------------------------------------
-        // generic C=A*B or C<M>=A*B with any semiring, with any typecasting
-        //----------------------------------------------------------------------
-
-        GB_cast_function cast_A, cast_B ;
-        if (flipxy)
-        { 
-            // A is typecasted to y, and B is typecasted to x
-            cast_A = A_is_pattern ? NULL : 
-                     GB_cast_factory (mult->ytype->code, A->type->code) ;
-            cast_B = B_is_pattern ? NULL : 
-                     GB_cast_factory (mult->xtype->code, B->type->code) ;
-        }
-        else
-        { 
-            // A is typecasted to x, and B is typecasted to y
-            cast_A = A_is_pattern ? NULL :
-                     GB_cast_factory (mult->xtype->code, A->type->code) ;
-            cast_B = B_is_pattern ? NULL :
-                     GB_cast_factory (mult->ytype->code, B->type->code) ;
-        }
-
-        // aik = A(i,k), of size asize
-        #undef  GB_GETA
-        #define GB_GETA(aik,Ax,pA)                                          \
-            if (!A_is_pattern) cast_A (aik, Ax +((pA)*asize), asize) ;
-
-        // bkj = B(k,j), of size bsize
-        #undef  GB_GETB
-        #define GB_GETB(bkj,Bx,pB)                                          \
-            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize) ;
-
-        if (flipxy)
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,y,x)
-            #include "GB_AxB_Gustavson_meta.c"
-            #undef GB_MULTIPLY
-        }
-        else
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,x,y)
-            #include "GB_AxB_Gustavson_meta.c"
-            #undef GB_MULTIPLY
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // return result
-    //--------------------------------------------------------------------------
-
-    ASSERT_SAUNA_IS_RESET ;
-
-    // cannot fail since C->plen is the upper bound: # non-empty columns of B
-    ASSERT (info == GrB_SUCCESS) ;
-    // if it could fail, do this:
-    // GB_OK (info) ;     // check result and return if an error occurred
-
-    ASSERT_MATRIX_OK (C, "C output for Gustavson C=A*B", GB0) ;
-    ASSERT (*Chandle == C) ;
-    (*mask_applied) = (M != NULL) ;
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_AxB_Gustavson_builtin.c b/Source/GB_AxB_Gustavson_builtin.c
deleted file mode 100644
index 5f06efe98e..0000000000
--- a/Source/GB_AxB_Gustavson_builtin.c
+++ /dev/null
@@ -1,99 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson_builtin:  hard-coded C=A*B for built-in types
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// This function computes C=A*B with hard-coded versions for all 1040 unique
-// built-in semirings that can be constructed with built-in operators.  It also
-// handles all non-unique built-in semirings, by renaming operators to
-// equivalent ones; if these are included, this function computes C=A*B for all
-// possible valid semirings that can be constructed from built-in operators.
-
-#include "GB_mxm.h"
-#ifndef GBCOMPACT
-#include "GB_AxB__include.h"
-
-GrB_Info GB_AxB_Gustavson_builtin
-(
-    GrB_Matrix C,                   // output matrix
-    const GrB_Matrix M,             // M matrix for C<M> (not complemented)
-    const GrB_Matrix A,             // input matrix
-    const bool A_is_pattern,        // true if only the pattern of A is used
-    const GrB_Matrix B,             // input matrix
-    const bool B_is_pattern,        // true if only the pattern of B is used
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    GB_Sauna Sauna                  // sparse accumulator
-)
-{ 
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT (!GB_aliased (C, M)) ;
-    ASSERT (!GB_aliased (C, A)) ;
-    ASSERT (!GB_aliased (C, B)) ;
-
-    if (M == NULL)
-    {
-        // C contains the pattern of C=A*B
-        ASSERT_MATRIX_OK (C, "C pattern for Gustavson A*B", GB0) ;
-    }
-    ASSERT_MATRIX_OK (A, "A for Gustavson A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for Gustavson A*B", GB0) ;
-    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
-    ASSERT (!GB_PENDING (M)) ; ASSERT (!GB_ZOMBIES (M)) ;
-    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
-    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for Gustavson", GB0) ;
-    ASSERT (C->type == semiring->add->op->ztype) ;
-
-    GB_Opcode mult_opcode, add_opcode ;
-    GB_Type_code xycode, zcode ;
-
-    GrB_Info info = GrB_NO_VALUE ;
-
-    // check if the semiring is builtin, and if so, get opcodes and type codes
-    if (!GB_AxB_semiring_builtin (A, A_is_pattern, B, B_is_pattern, semiring,
-        flipxy, &mult_opcode, &add_opcode, &xycode, &zcode))
-    { 
-        // no error condition, just not a built-in semiring.
-        return (GrB_NO_VALUE) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // define the worker for the switch factory
-    //--------------------------------------------------------------------------
-
-    #define GB_AXB(add,mult,xyname) GB_AgusB_ ## add ## mult ## xyname
-
-    #define GB_AxB_WORKER(add,mult,xyname)                      \
-    {                                                           \
-        info = GB_AXB (add,mult,xyname) (C, M,                  \
-            A, A_is_pattern, B, B_is_pattern, Sauna) ;          \
-    }                                                           \
-    break ;
-
-    //--------------------------------------------------------------------------
-    // launch the switch factory
-    //--------------------------------------------------------------------------
-
-    #include "GB_AxB_factory.c"
-
-    //--------------------------------------------------------------------------
-    // return result
-    //--------------------------------------------------------------------------
-
-    return (info) ;
-}
-
-#endif
-
diff --git a/Source/GB_AxB_alloc.c b/Source/GB_AxB_alloc.c
deleted file mode 100644
index 8a87be835c..0000000000
--- a/Source/GB_AxB_alloc.c
+++ /dev/null
@@ -1,163 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_alloc: estimate nnz(C) and allocate C for C=A*B or C<M>=A*B
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Does not log an error; returns GrB_SUCCESS, GrB_OUT_OF_MEMORY, or GrB_PANIC.
-// This used for Gustavon's method and the heap-based method, not the dot
-// product method.
-
-#include "GB_mxm.h"
-#include "GB_iterator.h"
-
-GrB_Info GB_AxB_alloc           // estimate nnz(C) and allocate C for C=A*B
-(
-    GrB_Matrix *Chandle,        // output matrix
-    const GrB_Type ctype,       // type of C
-    const GrB_Index cvlen,      // vector length of C
-    const GrB_Index cvdim,      // # of vectors of C
-    const GrB_Matrix M,         // optional mask
-    const GrB_Matrix A,         // input matrix A
-    const GrB_Matrix B,         // input matrix B
-    const bool numeric,         // if true, allocate A->x, else A->x is NULL
-    const int64_t cnz_extra     // added to the rough estimate (if M NULL)
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT (Chandle != NULL) ;
-    ASSERT (*Chandle == NULL) ;
-    ASSERT_MATRIX_OK_OR_NULL (M, "M for alloc C=A*B", GB0) ;
-    ASSERT_MATRIX_OK (A, "A for alloc C=A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for alloc C=A*B", GB0) ;
-    ASSERT (M == NULL || !M->is_slice) ;
-
-    GrB_Info info ;
-
-    //--------------------------------------------------------------------------
-    // determine hypersparsity
-    //--------------------------------------------------------------------------
-
-    // C is hypersparse if any of A, B, and/or M are hypersparse
-    // or if A or B are slice or hyperslice.  M is never a slice or hyperslice.
-    bool C_is_hyper = (cvdim > 1) &&
-        (A->is_hyper || B->is_hyper || (M != NULL && M->is_hyper)) ;
-    C_is_hyper = C_is_hyper || (A->is_slice || B->is_slice) ;
-
-    //--------------------------------------------------------------------------
-    // estimate nnz(C) and the # of vectors in C
-    //--------------------------------------------------------------------------
-
-    int64_t cplen = 0 ;
-    int64_t cnz_guess = 0 ;
-
-    if (M != NULL)
-    {
-
-        //----------------------------------------------------------------------
-        // C<M>=A*B, pattern of C is a subset of M
-        //----------------------------------------------------------------------
-
-        // cnz_guess is a strict upper bound on nnz(C)
-
-        const int64_t *GB_RESTRICT Mp = M->p ;
-        const int64_t *GB_RESTRICT Mh = M->h ;
-        const int64_t mnvec = M->nvec ;
-        int64_t mpleft = 0 ;
-        int64_t mpright = mnvec - 1 ;
-        const bool M_is_hyper = M->is_hyper ;
-        int64_t bnvec_nonempty = 0 ;
-
-        GBI_for_each_vector (B)
-        { 
-
-            //------------------------------------------------------------------
-            // get B(:,j)
-            //------------------------------------------------------------------
-
-            GBI_jth_iteration (j, pB, pB_end) ;
-            int64_t bjnz = pB_end - pB ;
-            if (bjnz == 0) continue ;
-            bnvec_nonempty++ ;
-
-            //------------------------------------------------------------------
-            // get M(:,j)
-            //------------------------------------------------------------------
-
-            // find vector j in M
-            int64_t pM_start, pM_end ;
-            GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright, j, &pM_start,
-                &pM_end) ;
-            int64_t mjnz = pM_end - pM_start ;
-            cnz_guess += mjnz ;
-
-            if (mjnz != 0) cplen++ ;
-        }
-
-        if (B->nvec_nonempty < 0) B->nvec_nonempty = bnvec_nonempty ;
-        ASSERT (B->nvec_nonempty == GB_nvec_nonempty (B, NULL)) ;
-
-    }
-    else
-    {
-
-        //----------------------------------------------------------------------
-        // C=A*B
-        //----------------------------------------------------------------------
-
-        // cnz_guess is a rough guess, just for allocating C->i for the
-        // symbolic phase for Gustavson, or during the combined symbolic/
-        // numeric phase for the heap-based method.  In either case, this space
-        // is reallocated if cnz_guess is too low, via GB_ix_realloc.
-
-        cnz_guess = cnz_extra + GB_NNZ (A) + GB_NNZ (B) ;
-
-        // abnzmax = cvlen * cvdim, but check for overflow
-        GrB_Index abnzmax ;
-        if (GB_Index_multiply (&abnzmax, cvlen, cvdim))
-        { 
-            // only do this if cvlen * cvdim does not overflow
-            cnz_guess = GB_IMIN (cnz_guess, abnzmax) ;
-        }
-
-        if (C_is_hyper)
-        {
-            if (B->nvec_nonempty < 0)
-            {
-                B->nvec_nonempty = GB_nvec_nonempty (B, NULL) ;
-            }
-            cplen = B->nvec_nonempty ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // allocate C
-    //--------------------------------------------------------------------------
-
-    // C->p and C->h are allocated but not initialized.  C->i is allocated.
-    // C->x is allocated if the numeric flag is true.
-
-    // add one to ensure cnz_guess > 0, and (cnz < C->nzmax) will always hold
-    // if cnz_guess is exact.
-
-    GB_CREATE (Chandle, ctype, cvlen, cvdim, GB_Ap_malloc, true,
-        GB_SAME_HYPER_AS (C_is_hyper), B->hyper_ratio, cplen,
-        cnz_guess + 1, numeric, NULL) ;
-
-    //--------------------------------------------------------------------------
-    // return result
-    //--------------------------------------------------------------------------
-
-    return (info) ;
-}
-
diff --git a/Source/GB_AxB_colscale.c b/Source/GB_AxB_colscale.c
index 70798eab3b..00a25d5c80 100644
--- a/Source/GB_AxB_colscale.c
+++ b/Source/GB_AxB_colscale.c
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_AxB_colscale: C = A*D, column scale with diagonal matrix D
+// GB_AxB_colscale: C = A*D where D is diagonal
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -47,10 +47,8 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
 
     int64_t anz   = GB_NNZ (A) ;
     int64_t anvec = A->nvec ;
-
     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
     int nthreads = GB_nthreads (anz + anvec, chunk, nthreads_max) ;
-
     int ntasks = (nthreads == 1) ? 1 : (32 * nthreads) ;
     ntasks = GB_IMIN (ntasks, anz) ;
     ntasks = GB_IMAX (ntasks, 1) ;
@@ -65,7 +63,7 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
 
     int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
     if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
-    {
+    { 
         // out of memory
         return (GB_OUT_OF_MEMORY) ;
     }
@@ -79,14 +77,15 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
 
     bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
     bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
     bool A_is_pattern = false ;
     bool D_is_pattern = false ;
 
     if (flipxy)
     { 
         // z = fmult (b,a) will be computed
-        A_is_pattern = op_is_first  ;
-        D_is_pattern = op_is_second ;
+        A_is_pattern = op_is_first  || op_is_pair ;
+        D_is_pattern = op_is_second || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->ytype))) ;
         ASSERT (GB_IMPLIES (!D_is_pattern,
@@ -95,8 +94,8 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
     else
     { 
         // z = fmult (a,b) will be computed
-        A_is_pattern = op_is_second ;
-        D_is_pattern = op_is_first  ;
+        A_is_pattern = op_is_second || op_is_pair ;
+        D_is_pattern = op_is_first  || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->xtype))) ;
         ASSERT (GB_IMPLIES (!D_is_pattern,
@@ -148,8 +147,8 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
 
         GB_Opcode opcode ;
         GB_Type_code xycode, zcode ;
-        if (GB_binop_builtin (A, A_is_pattern, D, D_is_pattern, mult,
-            flipxy, &opcode, &xycode, &zcode))
+        if (GB_binop_builtin (A->type, A_is_pattern, D->type, D_is_pattern,
+            mult, flipxy, &opcode, &xycode, &zcode))
         { 
             // C=A*D, colscale with built-in operator
             #include "GB_binop_factory.c"
@@ -168,6 +167,8 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
         // get operators, functions, workspace, contents of A, D, and C
         //----------------------------------------------------------------------
 
+        GB_BURBLE_MATRIX (C, "generic ") ;
+
         GxB_binary_function fmult = mult->function ;
 
         size_t csize = C->type->size ;
@@ -231,6 +232,7 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
 
         // no vectorization
         #define GB_PRAGMA_VECTORIZE
+        #define GB_PRAGMA_VECTORIZE_DOT
 
         if (flipxy)
         { 
diff --git a/Source/GB_AxB_dot_parallel.c b/Source/GB_AxB_dot.c
similarity index 84%
rename from Source/GB_AxB_dot_parallel.c
rename to Source/GB_AxB_dot.c
index ec4702d4e5..f87ace8584 100644
--- a/Source/GB_AxB_dot_parallel.c
+++ b/Source/GB_AxB_dot.c
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_AxB_dot_parallel: C<M>=A'*B, or C=A'*B using dot products
+// GB_AxB_dot: C<M>=A'*B using dot products
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -23,11 +23,15 @@
 // if C is m-by-n.  It is thus only suitable for cases when A and B are large,
 // and C is small.  GB_AxB_dot3 computes C<M>=A'*B, and it only needs to
 // examine entries in M, taking Omega(nnz(M)) time.  It can thus be used for
-// very large matrices C.
+// very large matrices C.  GB_AxB_dot4 computes C+=A'*B when C is dense.
 
 // The output matrix C = *Chandle has not been allocated, so C is NULL on
 // input.  The mask M is optional.
 
+// If C is computed in place, Chandle is ignored, and the result is computed
+// in C_in_place instead.  This case requires the accum operator to match
+// the monoid of the semiring.
+
 // The semiring defines C=A*B.  flipxy modifies how the semiring multiply
 // operator is applied.  If false, then fmult(aik,bkj) is computed.  If true,
 // then the operands are swapped, and fmult(bkj,aij) is done instead.
@@ -51,16 +55,19 @@
     GB_FREE_MEMORY (Aslice, naslice+1, sizeof (int64_t)) ;      \
 }
 
-GrB_Info GB_AxB_dot_parallel        // parallel dot product
+GrB_Info GB_AxB_dot                 // dot product (multiple methods)
 (
     GrB_Matrix *Chandle,            // output matrix, NULL on input
+    GrB_Matrix C_in_place,          // input/output matrix, if done in place
     GrB_Matrix M,                   // optional mask matrix
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,             // input matrix A
     const GrB_Matrix B,             // input matrix B
     const GrB_Semiring semiring,    // semiring that defines C=A*B
     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
     bool *mask_applied,             // if true, mask was applied
+    bool *done_in_place,            // if true, C_in_place was computed in place
     GB_Context Context
 )
 {
@@ -92,8 +99,10 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
         //======================================================================
 
         // use dot3 if M is present and not complemented
+        GBBURBLE ("dot3 ") ;
         (*mask_applied) = true ;
-        return (GB_AxB_dot3 (Chandle, M, A, B, semiring, flipxy, Context)) ;
+        return (GB_AxB_dot3 (Chandle, M, Mask_struct, A, B, semiring, flipxy,
+            Context)) ;
 
     }
     else
@@ -119,6 +128,21 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
             A->nvec_nonempty = GB_nvec_nonempty (A, NULL) ;
         }
 
+        //======================================================================
+        // in place C+=A'*B
+        //======================================================================
+
+        if (C_in_place != NULL && M == NULL && !Mask_comp)
+        { 
+            GBBURBLE ("dense, C+=A'*B in place ") ;
+            (*done_in_place) = true ;
+            return (GB_AxB_dot4 (C_in_place, A, B, semiring, flipxy, Context)) ;
+        }
+
+        //----------------------------------------------------------------------
+        // determine the number of threads to use
+        //----------------------------------------------------------------------
+
         int64_t anvec = A->nvec ;
         int64_t anz   = GB_NNZ (A) ;
 
@@ -127,10 +151,6 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
 
         ASSERT (A->vlen == B->vlen) ;
 
-        //----------------------------------------------------------------------
-        // determine the number of threads to use
-        //----------------------------------------------------------------------
-
         GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
         int nthreads = GB_nthreads (anz + bnz, chunk, nthreads_max) ;
 
@@ -139,10 +159,10 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
         //======================================================================
 
         if (nthreads == 1)
-        { 
+        {
             // do the entire computation with a single thread
-            info = GB_AxB_dot2 (Chandle, M, &A, B, semiring, flipxy,
-                mask_applied, 1, 1, 1, NULL) ;
+            info = GB_AxB_dot2 (Chandle, M, Mask_struct, &A, B, semiring,
+                flipxy, mask_applied, 1, 1, 1, NULL) ;
             if (info == GrB_SUCCESS)
             { 
                 ASSERT_MATRIX_OK (*Chandle, "C for sequential A*B", GB0) ;
@@ -189,7 +209,7 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
 
         GB_CALLOC_MEMORY (Aslice, naslice+1, sizeof (GrB_Matrix)) ;
         if (Aslice == NULL || !GB_pslice (&Slice, A->p, A->nvec, naslice))
-        {
+        { 
             // out of memory
             GB_FREE_ALL ;
             return (GB_OUT_OF_MEMORY) ;
@@ -205,8 +225,8 @@ GrB_Info GB_AxB_dot_parallel        // parallel dot product
         // compute each slice of C = A'*B or C<!M> = A'*B
         //----------------------------------------------------------------------
 
-        GB_OK (GB_AxB_dot2 (Chandle, M, Aslice, B, semiring, flipxy,
-            mask_applied, nthreads, naslice, nbslice, Context)) ;
+        GB_OK (GB_AxB_dot2 (Chandle, M, Mask_struct, Aslice, B, semiring,
+            flipxy, mask_applied, nthreads, naslice, nbslice, Context)) ;
 
         //----------------------------------------------------------------------
         // free workspace and return result
diff --git a/Source/GB_AxB_dot2.c b/Source/GB_AxB_dot2.c
index 2d4a163a25..a7da129b5d 100644
--- a/Source/GB_AxB_dot2.c
+++ b/Source/GB_AxB_dot2.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot2: compute C=A'*B or C<!M>=A'*B in parallel, in place
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -39,6 +39,7 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
     GrB_Matrix *Chandle,            // output matrix
     const GrB_Matrix M,             // mask matrix for C<!M>=A'*B
                                     // if present, the mask is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix *Aslice,       // input matrices (already sliced)
     const GrB_Matrix B,             // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -94,14 +95,15 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
     bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
     bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
     bool A_is_pattern = false ;
     bool B_is_pattern = false ;
 
     if (flipxy)
     { 
         // z = fmult (b,a) will be computed
-        A_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
+        A_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->ytype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -110,8 +112,8 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
     else
     { 
         // z = fmult (a,b) will be computed
-        A_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
+        A_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->xtype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -125,7 +127,7 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
     //--------------------------------------------------------------------------
 
     if (!GB_pslice (&B_slice, /* B */ B->p, B->nvec, nbslice))
-    {
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GrB_OUT_OF_MEMORY) ;
@@ -146,7 +148,7 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
     GB_CALLOC_MEMORY (C_counts, naslice, sizeof (int64_t *)) ;
     if (C_counts == NULL)
-    {
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GrB_OUT_OF_MEMORY) ;
@@ -251,7 +253,7 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
     #define GB_AxB_WORKER(add,mult,xyname)                              \
     {                                                                   \
-        info = GB_Adot2B (add,mult,xyname) (C, M,                       \
+        info = GB_Adot2B (add,mult,xyname) (C, M, Mask_struct,          \
             Aslice, A_is_pattern, B, B_is_pattern, B_slice,             \
             C_counts, nthreads, naslice, nbslice) ;                     \
         done = (info != GrB_NO_VALUE) ;                                 \
@@ -274,47 +276,13 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
 #endif
 
-    //--------------------------------------------------------------------------
-    // user semirings created at compile time
-    //--------------------------------------------------------------------------
-
-    if (semiring->object_kind == GB_USER_COMPILED)
-    {
-        // determine the required type of A and B for the user semiring
-        GrB_Type atype_required, btype_required ;
-
-        if (flipxy)
-        { 
-            // A is passed as y, and B as x, in z = mult(x,y)
-            atype_required = mult->ytype ;
-            btype_required = mult->xtype ;
-        }
-        else
-        { 
-            // A is passed as x, and B as y, in z = mult(x,y)
-            atype_required = mult->xtype ;
-            btype_required = mult->ytype ;
-        }
-
-        if (A->type == atype_required && B->type == btype_required)
-        {
-            info = GB_AxB_user (GxB_AxB_DOT, semiring, Chandle, M, NULL, B,
-                flipxy,
-                /* heap: */ NULL, NULL, NULL, 0,
-                /* Gustavson: */ NULL,
-                /* dot2: */ Aslice, B_slice, nthreads, naslice, nbslice,
-                            C_counts,
-                /* dot3: */ NULL, 0) ;
-            done = true ;
-        }
-    }
-
     //--------------------------------------------------------------------------
     // C = A'*B, computing each entry with a dot product, with typecasting
     //--------------------------------------------------------------------------
 
     if (!done)
     {
+        GB_BURBLE_MATRIX (C, "generic ") ;
 
         //----------------------------------------------------------------------
         // get operators, functions, workspace, contents of A, B, C, and M
@@ -364,12 +332,12 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
         // aki = A(k,i), located in Ax [pA]
         #define GB_GETA(aki,Ax,pA)                                          \
             GB_void aki [GB_VLA(aki_size)] ;                                \
-            if (!A_is_pattern) cast_A (aki, Ax +((pA)*asize), asize) ;
+            if (!A_is_pattern) cast_A (aki, Ax +((pA)*asize), asize)
 
         // bkj = B(k,j), located in Bx [pB]
         #define GB_GETB(bkj,Bx,pB)                                          \
             GB_void bkj [GB_VLA(bkj_size)] ;                                \
-            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize) ;
+            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
 
         // break if cij reaches the terminal value
         #define GB_DOT_TERMINAL(cij)                                        \
@@ -380,24 +348,24 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
         // C(i,j) = A(i,k) * B(k,j)
         #define GB_MULT(cij, aki, bkj)                                      \
-            GB_MULTIPLY (cij, aki, bkj) ;                                   \
+            GB_MULTIPLY (cij, aki, bkj)
 
         // C(i,j) += A(i,k) * B(k,j)
         #define GB_MULTADD(cij, aki, bkj)                                   \
             GB_void zwork [GB_VLA(csize)] ;                                 \
             GB_MULTIPLY (zwork, aki, bkj) ;                                 \
-            fadd (cij, cij, zwork) ;
+            fadd (cij, cij, zwork)
 
         // define cij for each task
         #define GB_CIJ_DECLARE(cij)                                         \
-            GB_void cij [GB_VLA(csize)] ;
+            GB_void cij [GB_VLA(csize)]
 
         // address of Cx [p]
         #define GB_CX(p) Cx +((p)*csize)
 
         // save the value of C(i,j)
         #define GB_CIJ_SAVE(cij,p)                                          \
-            memcpy (GB_CX (p), cij, csize) ;
+            memcpy (GB_CX (p), cij, csize)
 
         #define GB_ATYPE GB_void
         #define GB_BTYPE GB_void
@@ -405,8 +373,9 @@ GrB_Info GB_AxB_dot2                // C=A'*B or C<!M>=A'*B, dot product method
 
         #define GB_PHASE_2_OF_2
 
-        // loops with function pointers cannot be vectorized
-        #define GB_DOT_SIMD ;
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+        #define GB_PRAGMA_VECTORIZE_DOT
 
         if (flipxy)
         { 
diff --git a/Source/GB_AxB_dot3.c b/Source/GB_AxB_dot3.c
index f1dd56087c..69ba06abfe 100644
--- a/Source/GB_AxB_dot3.c
+++ b/Source/GB_AxB_dot3.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot3: compute C<M> = A'*B in parallel
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,6 +30,7 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 (
     GrB_Matrix *Chandle,            // output matrix
     const GrB_Matrix M,             // mask matrix
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,             // input matrix
     const GrB_Matrix B,             // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -67,14 +68,15 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 
     bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
     bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
     bool A_is_pattern = false ;
     bool B_is_pattern = false ;
 
     if (flipxy)
     { 
         // z = fmult (b,a) will be computed
-        A_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
+        A_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->ytype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -83,8 +85,8 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
     else
     { 
         // z = fmult (a,b) will be computed
-        A_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
+        A_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
         ASSERT (GB_IMPLIES (!A_is_pattern,
             GB_Type_compatible (A->type, mult->xtype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -100,14 +102,13 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
     const int64_t *GB_RESTRICT Mp = M->p ;
     const int64_t *GB_RESTRICT Mh = M->h ;
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
     const int64_t mvlen = M->vlen ;
     const int64_t mvdim = M->vdim ;
     const int64_t mnz = GB_NNZ (M) ;
     const int64_t mnvec = M->nvec ;
     const bool M_is_hyper = M->is_hyper ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
 
     const int64_t *GB_RESTRICT Ap = A->p ;
     const int64_t *GB_RESTRICT Ah = A->h ;
@@ -139,7 +140,7 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 
     GB_CREATE (Chandle, ctype, cvlen, cvdim, GB_Ap_malloc, true,
         GB_SAME_HYPER_AS (M_is_hyper), M->hyper_ratio, cnvec,
-        cnz+1,  // add one to cnz for GB_cumsum
+        cnz+1,  // add one to cnz for GB_cumsum of Cwork in GB_AxB_dot3_slice
         true, Context) ;
     if (info != GrB_SUCCESS)
     { 
@@ -153,7 +154,6 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
     int64_t *GB_RESTRICT Cp = C->p ;
     int64_t *GB_RESTRICT Ch = C->h ;
     int64_t *GB_RESTRICT Cwork = C->i ;    // use C->i as workspace
-    // printf ("Ch is %p\n", (void *) Ch) ;
 
     //--------------------------------------------------------------------------
     // determine the # of threads to use
@@ -267,9 +267,7 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
                 for ( ; pM < pM_end ; pM++)
                 {
                     int64_t work = 1 ;
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t pA, pA_end, i = Mi [pM] ;
                         GB_lookup (A_is_hyper, Ah, Ap, &apleft, anvec-1, i,
@@ -291,7 +289,7 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
     GB_OK (GB_AxB_dot3_slice (&TaskList, &max_ntasks, &ntasks, &nthreads,
         C, Context)) ;
 
-    // if (ntasks > 1) printf ("ntasks %d\n", ntasks) ;
+    GBBURBLE ("nthreads %d ntasks %d ", nthreads, ntasks) ;
 
     //--------------------------------------------------------------------------
     // C<M> = A'*B, via masked dot product method and built-in semiring
@@ -309,7 +307,7 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 
     #define GB_AxB_WORKER(add,mult,xyname)                              \
     {                                                                   \
-        info = GB_Adot3B (add,mult,xyname) (C, M,                       \
+        info = GB_Adot3B (add,mult,xyname) (C, M, Mask_struct,          \
             A, A_is_pattern, B, B_is_pattern,                           \
             TaskList, ntasks, nthreads) ;                               \
         done = (info != GrB_NO_VALUE) ;                                 \
@@ -331,46 +329,13 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 
 #endif
 
-    //--------------------------------------------------------------------------
-    // user semirings created at compile time
-    //--------------------------------------------------------------------------
-
-    if (semiring->object_kind == GB_USER_COMPILED)
-    { 
-        // determine the required type of A and B for the user semiring
-        GrB_Type atype_required, btype_required ;
-
-        if (flipxy)
-        { 
-            // A is passed as y, and B as x, in z = mult(x,y)
-            atype_required = mult->ytype ;
-            btype_required = mult->xtype ;
-        }
-        else
-        { 
-            // A is passed as x, and B as y, in z = mult(x,y)
-            atype_required = mult->xtype ;
-            btype_required = mult->ytype ;
-        }
-
-        if (A->type == atype_required && B->type == btype_required)
-        {
-            info = GB_AxB_user (GxB_AxB_DOT, semiring, Chandle, M, A, B,
-                flipxy,
-                /* heap: */ NULL, NULL, NULL, 0,
-                /* Gustavson: */ NULL,
-                /* dot2: */ NULL, NULL, nthreads, 0, 0, NULL,
-                /* dot3: */ TaskList, ntasks) ;
-            done = true ;
-        }
-    }
-
     //--------------------------------------------------------------------------
     // C<M> = A'*B, via masked dot product method and typecasting
     //--------------------------------------------------------------------------
 
     if (!done)
     {
+        GB_BURBLE_MATRIX (C, "generic ") ;
 
         //----------------------------------------------------------------------
         // get operators, functions, workspace, contents of A, B, C, and M
@@ -420,12 +385,12 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
         // aki = A(k,i), located in Ax [pA]
         #define GB_GETA(aki,Ax,pA)                                          \
             GB_void aki [GB_VLA(aki_size)] ;                                \
-            if (!A_is_pattern) cast_A (aki, Ax +((pA)*asize), asize) ;
+            if (!A_is_pattern) cast_A (aki, Ax +((pA)*asize), asize)
 
         // bkj = B(k,j), located in Bx [pB]
         #define GB_GETB(bkj,Bx,pB)                                          \
             GB_void bkj [GB_VLA(bkj_size)] ;                                \
-            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize) ;
+            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
 
         // break if cij reaches the terminal value
         #define GB_DOT_TERMINAL(cij)                                        \
@@ -436,31 +401,32 @@ GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 
         // C(i,j) = A(i,k) * B(k,j)
         #define GB_MULT(cij, aki, bkj)                                      \
-            GB_MULTIPLY (cij, aki, bkj) ;                                   \
+            GB_MULTIPLY (cij, aki, bkj)
 
         // C(i,j) += A(i,k) * B(k,j)
         #define GB_MULTADD(cij, aki, bkj)                                   \
             GB_void zwork [GB_VLA(csize)] ;                                 \
             GB_MULTIPLY (zwork, aki, bkj) ;                                 \
-            fadd (cij, cij, zwork) ;
+            fadd (cij, cij, zwork)
 
         // define cij for each task
         #define GB_CIJ_DECLARE(cij)                                         \
-            GB_void cij [GB_VLA(csize)] ;
+            GB_void cij [GB_VLA(csize)]
 
         // address of Cx [p]
         #define GB_CX(p) Cx +((p)*csize)
 
         // save the value of C(i,j)
         #define GB_CIJ_SAVE(cij,p)                                          \
-            memcpy (GB_CX (p), cij, csize) ;
+            memcpy (GB_CX (p), cij, csize)
 
         #define GB_ATYPE GB_void
         #define GB_BTYPE GB_void
         #define GB_CTYPE GB_void
 
-        // loops with function pointers cannot be vectorized
-        #define GB_DOT_SIMD ;
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+        #define GB_PRAGMA_VECTORIZE_DOT
 
         if (flipxy)
         { 
diff --git a/Source/GB_AxB_dot3_one_slice.c b/Source/GB_AxB_dot3_one_slice.c
index e733c94ace..77b0c75937 100644
--- a/Source/GB_AxB_dot3_one_slice.c
+++ b/Source/GB_AxB_dot3_one_slice.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot3_one_slice: slice the entries and vectors of a single matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -41,7 +41,7 @@ GrB_Info GB_AxB_dot3_one_slice
     const GrB_Matrix M,             // matrix to slice
     GB_Context Context
 )
-{ 
+{
 
     //--------------------------------------------------------------------------
     // check inputs
@@ -115,7 +115,7 @@ GrB_Info GB_AxB_dot3_one_slice
     //--------------------------------------------------------------------------
 
     if (!GB_pslice (&Coarse, Mp, mnvec, ntasks1))
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_AxB_dot3_slice.c b/Source/GB_AxB_dot3_slice.c
index b4ff8fbfa3..086a84190a 100644
--- a/Source/GB_AxB_dot3_slice.c
+++ b/Source/GB_AxB_dot3_slice.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot3_slice: slice the entries and vectors for C<M>=A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -130,6 +130,7 @@ GrB_Info GB_AxB_dot3_slice
     double target_task_size = total_work / (double) (ntasks0) ;
     target_task_size = GB_IMAX (target_task_size, chunk) ;
     ntasks1 = total_work / target_task_size ;
+    ntasks1 = GB_IMIN (ntasks1, cnz) ;
     ntasks1 = GB_IMAX (ntasks1, 1) ;
 
     //--------------------------------------------------------------------------
@@ -137,7 +138,7 @@ GrB_Info GB_AxB_dot3_slice
     //--------------------------------------------------------------------------
 
     if (!GB_pslice (&Coarse, Cwork, cnz, ntasks1))
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_AxB_dot4.c b/Source/GB_AxB_dot4.c
new file mode 100644
index 0000000000..0a0325e1f9
--- /dev/null
+++ b/Source/GB_AxB_dot4.c
@@ -0,0 +1,281 @@
+//------------------------------------------------------------------------------
+// GB_AxB_dot4: compute C+=A'*B in place
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// GB_AxB_dot4 does its computation in a single phase, computing its result in
+// the input matrix C, which is already dense.  The mask M is not handled by
+// this function.
+
+#include "GB_mxm.h"
+#ifndef GBCOMPACT
+#include "GB_AxB__include.h"
+#endif
+
+#define GB_FREE_WORK                                            \
+{                                                               \
+    GB_FREE_MEMORY (A_slice, naslice+1, sizeof (int64_t)) ;     \
+    GB_FREE_MEMORY (B_slice, nbslice+1, sizeof (int64_t)) ;     \
+}
+
+GrB_Info GB_AxB_dot4                // C+=A'*B, dot product method
+(
+    GrB_Matrix C,                   // input/output matrix, must be dense
+    const GrB_Matrix A,             // input matrix
+    const GrB_Matrix B,             // input matrix
+    const GrB_Semiring semiring,    // semiring that defines C+=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for dot in place += A'*B", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for dot in place += A'*B", GB0) ;
+    ASSERT_MATRIX_OK (B, "B for dot in place += A'*B", GB0) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
+    ASSERT (GB_is_dense (C)) ;
+    ASSERT_SEMIRING_OK (semiring, "semiring for in place += A'*B", GB0) ;
+    ASSERT (A->vlen == B->vlen) ;
+
+    int64_t *GB_RESTRICT A_slice = NULL ;
+    int64_t *GB_RESTRICT B_slice = NULL ;
+
+    //--------------------------------------------------------------------------
+    // get the semiring operators
+    //--------------------------------------------------------------------------
+
+    GrB_BinaryOp mult = semiring->multiply ;
+    GrB_Monoid add = semiring->add ;
+    ASSERT (mult->ztype == add->op->ztype) ;
+    ASSERT (C->type     == add->op->ztype) ;
+
+    bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
+    bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
+    bool A_is_pattern = false ;
+    bool B_is_pattern = false ;
+
+    if (flipxy)
+    { 
+        // z = fmult (b,a) will be computed
+        A_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
+        ASSERT (GB_IMPLIES (!A_is_pattern,
+            GB_Type_compatible (A->type, mult->ytype))) ;
+        ASSERT (GB_IMPLIES (!B_is_pattern,
+            GB_Type_compatible (B->type, mult->xtype))) ;
+    }
+    else
+    { 
+        // z = fmult (a,b) will be computed
+        A_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
+        ASSERT (GB_IMPLIES (!A_is_pattern,
+            GB_Type_compatible (A->type, mult->xtype))) ;
+        ASSERT (GB_IMPLIES (!B_is_pattern,
+            GB_Type_compatible (B->type, mult->ytype))) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t anz = GB_NNZ (A) ;
+    int64_t bnz = GB_NNZ (B) ;
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (anz + bnz, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // slice A and B
+    //--------------------------------------------------------------------------
+
+    int64_t anvec = A->nvec ;
+    int64_t bnvec = B->nvec ;
+
+    int naslice = (nthreads == 1) ? 1 : (16 * nthreads) ;
+    int nbslice = (nthreads == 1) ? 1 : (16 * nthreads) ;
+
+    naslice = GB_IMIN (naslice, anvec) ;
+    nbslice = GB_IMIN (nbslice, bnvec) ;
+
+    if (!GB_pslice (&A_slice, A->p, anvec, naslice)  ||
+        !GB_pslice (&B_slice, B->p, bnvec, nbslice))
+    { 
+        // out of memory
+        GB_FREE_WORK ;
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // C += A'*B, computing each entry with a dot product, via builtin semiring
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+#ifndef GBCOMPACT
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    #define GB_Adot4B(add,mult,xyname) GB_Adot4B_ ## add ## mult ## xyname
+
+    #define GB_AxB_WORKER(add,mult,xyname)          \
+    {                                               \
+        info = GB_Adot4B (add,mult,xyname) (C,      \
+            A, A_is_pattern, A_slice, naslice,      \
+            B, B_is_pattern, B_slice, nbslice,      \
+            nthreads) ;                             \
+        done = (info != GrB_NO_VALUE) ;             \
+    }                                               \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    GB_Opcode mult_opcode, add_opcode ;
+    GB_Type_code xycode, zcode ;
+
+    if (GB_AxB_semiring_builtin (A, A_is_pattern, B, B_is_pattern, semiring,
+        flipxy, &mult_opcode, &add_opcode, &xycode, &zcode))
+    { 
+        #include "GB_AxB_factory.c"
+    }
+
+#endif
+
+    //--------------------------------------------------------------------------
+    // C += A'*B, computing each entry with a dot product, with typecasting
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    {
+        GB_BURBLE_MATRIX (C, "generic ") ;
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of A, B, C, and M
+        //----------------------------------------------------------------------
+
+        GxB_binary_function fmult = mult->function ;
+        GxB_binary_function fadd  = add->op->function ;
+
+        size_t csize = C->type->size ;
+        size_t asize = A_is_pattern ? 0 : A->type->size ;
+        size_t bsize = B_is_pattern ? 0 : B->type->size ;
+
+        size_t xsize = mult->xtype->size ;
+        size_t ysize = mult->ytype->size ;
+
+        // scalar workspace: because of typecasting, the x/y types need not
+        // be the same as the size of the A and B types.
+        // flipxy false: aki = (xtype) A(k,i) and bkj = (ytype) B(k,j)
+        // flipxy true:  aki = (ytype) A(k,i) and bkj = (xtype) B(k,j)
+        size_t aki_size = flipxy ? ysize : xsize ;
+        size_t bkj_size = flipxy ? xsize : ysize ;
+
+        GB_void *GB_RESTRICT terminal = add->terminal ;
+
+        GB_cast_function cast_A, cast_B ;
+        if (flipxy)
+        { 
+            // A is typecasted to y, and B is typecasted to x
+            cast_A = A_is_pattern ? NULL : 
+                     GB_cast_factory (mult->ytype->code, A->type->code) ;
+            cast_B = B_is_pattern ? NULL : 
+                     GB_cast_factory (mult->xtype->code, B->type->code) ;
+        }
+        else
+        { 
+            // A is typecasted to x, and B is typecasted to y
+            cast_A = A_is_pattern ? NULL :
+                     GB_cast_factory (mult->xtype->code, A->type->code) ;
+            cast_B = B_is_pattern ? NULL :
+                     GB_cast_factory (mult->ytype->code, B->type->code) ;
+        }
+
+        //----------------------------------------------------------------------
+        // C = A'*B via dot products, function pointers, and typecasting
+        //----------------------------------------------------------------------
+
+        // aki = A(k,i), located in Ax [pA]
+        #define GB_GETA(aki,Ax,pA)                                          \
+            GB_void aki [GB_VLA(aki_size)] ;                                \
+            if (!A_is_pattern) cast_A (aki, Ax +((pA)*asize), asize)
+
+        // bkj = B(k,j), located in Bx [pB]
+        #define GB_GETB(bkj,Bx,pB)                                          \
+            GB_void bkj [GB_VLA(bkj_size)] ;                                \
+            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
+
+        // break if cij reaches the terminal value
+        #define GB_DOT_TERMINAL(cij)                                        \
+            if (terminal != NULL && memcmp (cij, terminal, csize) == 0)     \
+            {                                                               \
+                break ;                                                     \
+            }
+
+        // C(i,j) += A(i,k) * B(k,j)
+        #define GB_MULTADD(cij, aki, bkj)                                   \
+            GB_void zwork [GB_VLA(csize)] ;                                 \
+            GB_MULTIPLY (zwork, aki, bkj) ;                                 \
+            fadd (cij, cij, zwork)
+
+        // define cij for each task
+        #define GB_CIJ_DECLARE(cij)                                         \
+            GB_void cij [GB_VLA(csize)]
+
+        // address of Cx [p]
+        #define GB_CX(p) Cx +((p)*csize)
+
+        // cij = Cx [p]
+        #define GB_GETC(cij,pC)                                             \
+            memcpy (cij, GB_CX (pC), csize)
+
+        // Cx [p] = cij
+        #define GB_PUTC(cij,pC)                                             \
+            memcpy (GB_CX (pC), cij, csize)
+
+        #define GB_ATYPE GB_void
+        #define GB_BTYPE GB_void
+        #define GB_CTYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+        #define GB_PRAGMA_VECTORIZE_DOT
+
+        if (flipxy)
+        { 
+            #define GB_MULTIPLY(z,x,y) fmult (z,y,x)
+            #include "GB_AxB_dot4_template.c"
+            #undef GB_MULTIPLY
+        }
+        else
+        { 
+            #define GB_MULTIPLY(z,x,y) fmult (z,x,y)
+            #include "GB_AxB_dot4_template.c"
+            #undef GB_MULTIPLY
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "dot: C += A'*B output", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_AxB_flopcount.c b/Source/GB_AxB_flopcount.c
index bcd4c491e8..f8e190ce67 100644
--- a/Source/GB_AxB_flopcount.c
+++ b/Source/GB_AxB_flopcount.c
@@ -1,24 +1,21 @@
 //------------------------------------------------------------------------------
-// GB_AxB_flopcount:  compute flops for C<M>=A*B or C=A*B
+// GB_AxB_flopcount:  compute flops for C=A*B, C<M>=A*B, or C<!M>=A*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// On input, A and B are two matrices for C<M>=A*B or C=A*B.  The flop count
-// for each B(:,j) is computed, and returned as a cumulative sum.  This
-// function is CSR/CSC agnostic, but for simplicity of this description, assume
-// A and B are both CSC matrices, so that ncols(A) == nrows(B).  For both CSR
-// and CSC, A->vdim == B->vlen holds.  A and/or B may be hypersparse, in any
-// combination.
+// On input, A, B, and M (optional) are matrices for C=A*B, C<M>=A*B, or
+// C<!M>=A*B.  The flop count for each B(:,j) is computed, and returned as a
+// cumulative sum.  This function is CSR/CSC agnostic, but for simplicity of
+// this description, assume A and B are both CSC matrices, so that ncols(A) ==
+// nrows(B).  For both CSR and CSC, A->vdim == B->vlen holds.  A and/or B may
+// be hypersparse, in any combination.
 
-// The complemented mask is not handled, so the flops for C<!M>=A*B is not
-// computed.
-
-// If present, Bflops has size (B->nvec)+1, for both standard and hypersparse
-// B.  Let n = B->vdim be the column dimension of B (that is, B is m-by-n).
+// Bflops has size (B->nvec)+1, for both standard and hypersparse B.  Let
+// n=B->vdim be the column dimension of B (that is, B is m-by-n).
 
 // If B is a standard CSC matrix then Bflops has size n+1 == B->nvec+1, and on
 // output, Bflops [j] is the # of flops required to compute C (:, 0:j-1).  B->h
@@ -37,9 +34,9 @@
 // (by slicing Bflops).
 
 // This algorithm does not look at the values of M, A, or B, just their
-// patterns.  If the mask is present, it is assumed to not be complemented.
-// The flop count of C=A*B or C<M>=A*B is computed for a saxpy-based method;
-// the work for A'*B for the dot product method is not computed.
+// patterns.  The flop count of C=A*B, C<M>=A*B, or C<!M>=A*B is computed for a
+// saxpy-based method; the work for A'*B for the dot product method is not
+// computed.
 
 // The algorithm scans all nonzeros in B.  It only scans at most the min and
 // max (first and last) row indices in A and M (if M is present).  If A and M
@@ -51,31 +48,31 @@
 /*
     [m n] = size (B) ;
     Bflops = zeros (1,n+1) ;        % (set to zero in the caller)
+    Mwork = 0 ;
     for each column j in B:
         if (B (:,j) is empty) continue ;
-        if (M is present and M (:,j) is empty) continue ;
+        mjnz = nnz (M (:,j))
+        if (M is present, not complemented, and M (:,j) is empty) continue ;
         im_first = min row index of nonzeros in M(:,j)
         im_last  = max row index of nonzeros in M(:,j)
+        Bflops (j) = mjnz if M present, to scatter M(:,j) (M or !M case)
+        Mwork += mjnz
         for each k where B (k,j) is nonzero:
             aknz = nnz (A (:,k))
             if (aknz == 0) continue ;
             alo = min row index of nonzeros in A(:,k)
             ahi = max row index of nonzeros in A(:,k)
-            if (M is present)
+            if (M is present and not complemented)
                 if (intersection (alo:ahi, im_first:im_last) empty) continue
             end
-            % numerical phase will compute: C(:,j)<M(:,j)> += A(:,k)*B(k,j),
-            % which takes aknz flops, so:
+            % numerical phase will compute: C(:,j)<#M(:,j)> += A(:,k)*B(k,j)
+            % where #M is no mask, M, or !M.  This typically takes aknz flops,
+            % or with a binary search if nnz(M(:,j)) << nnz(A(:,k)).
             Bflops (j) += aknz
-            Bflops_per_entry (k,j) = aknz
         end
     end
 */ 
 
-// If Bflops and Bflops_per_entry are both NULL, then only the true/false
-// result of the test (total_flops <= floplimit) is returned.  This allows the
-// function to return early, once the total_flops exceeds the threshold.
-
 #include "GB_mxm.h"
 #include "GB_ek_slice.h"
 #include "GB_bracket.h"
@@ -85,18 +82,16 @@
     GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ; \
     GB_FREE_MEMORY (Wfirst, ntasks, sizeof (int64_t)) ;                     \
     GB_FREE_MEMORY (Wlast,  ntasks, sizeof (int64_t)) ;                     \
-    GB_FREE_MEMORY (Flops,  ntasks+1, sizeof (int64_t)) ;                   \
 }
 
 GrB_Info GB_AxB_flopcount
 (
-    bool *result,               // result of test (total_flops <= floplimit)
-    int64_t *Bflops,            // size B->nvec+1 and all zero, if present
-    int64_t *Bflops_per_entry,  // size nnz(B)+1 and all zero, if present
+    int64_t *Mwork,             // amount of work to handle the mask M
+    int64_t *Bflops,            // size B->nvec+1 and all zero
     const GrB_Matrix M,         // optional mask matrix
+    const bool Mask_comp,       // if true, mask is complemented
     const GrB_Matrix A,
     const GrB_Matrix B,
-    int64_t floplimit,          // maximum flops to compute if Bflops NULL
     GB_Context Context
 )
 {
@@ -112,6 +107,8 @@ GrB_Info GB_AxB_flopcount
     ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
     ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
     ASSERT (A->vdim == B->vlen) ;
+    ASSERT (Bflops != NULL) ;
+    ASSERT (Mwork != NULL) ;
 
     //--------------------------------------------------------------------------
     // determine the number of threads to use
@@ -123,28 +120,11 @@ GrB_Info GB_AxB_flopcount
     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
     int nthreads = GB_nthreads (bnz + bnvec, chunk, nthreads_max) ;
 
-    //--------------------------------------------------------------------------
-    // determine the kind of result to return
-    //--------------------------------------------------------------------------
-
-    bool check_quick_return = (Bflops == NULL) && (Bflops_per_entry == NULL) ;
-
     #ifdef GB_DEBUG
-    if (Bflops != NULL)
+    // Bflops must be set to zero in the caller
+    for (int64_t kk = 0 ; kk <= bnvec ; kk++)
     {
-        // Bflops is set to zero in the calller
-        for (int64_t kk = 0 ; kk <= bnvec ; kk++)
-        {
-            ASSERT (Bflops [kk] == 0) ;
-        }
-    }
-    if (Bflops_per_entry != NULL)
-    {
-        // Bflops_per_entry is set to zero in the calller
-        for (int64_t pB = 0 ; pB <= bnz ; pB++)
-        {
-            ASSERT (Bflops_per_entry [pB] == 0) ;
-        }
+        ASSERT (Bflops [kk] == 0) ;
     }
     #endif
 
@@ -152,6 +132,7 @@ GrB_Info GB_AxB_flopcount
     // get the mask, if present
     //--------------------------------------------------------------------------
 
+    bool mask_is_M = (M != NULL && !Mask_comp) ;
     const int64_t *GB_RESTRICT Mh = NULL ;
     const int64_t *GB_RESTRICT Mp = NULL ;
     const int64_t *GB_RESTRICT Mi = NULL ;
@@ -184,20 +165,19 @@ GrB_Info GB_AxB_flopcount
     // construct the parallel tasks
     //--------------------------------------------------------------------------
 
-    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1
-    // and vectors kfirst_slice [tid] to klast_slice [tid].  The first and
-    // last vectors may be shared with prior slices and subsequent slices.
+    // taskid does entries pstart_slice [taskid] to pstart_slice [taskid+1]-1
+    // and vectors kfirst_slice [taskid] to klast_slice [taskid].  The first
+    // and last vectors may be shared with prior slices and subsequent slices.
 
     int64_t *GB_RESTRICT Wfirst = NULL ;       // size ntasks
     int64_t *GB_RESTRICT Wlast = NULL ;        // size ntasks
-    int64_t *GB_RESTRICT Flops = NULL ;        // size ntasks+1
 
     int ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
     ntasks = GB_IMIN (ntasks, bnz) ;
     ntasks = GB_IMAX (ntasks, 1) ;
     int64_t *pstart_slice, *kfirst_slice, *klast_slice ;
     if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, B, ntasks))
-    {
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GB_OUT_OF_MEMORY) ;
@@ -209,95 +189,93 @@ GrB_Info GB_AxB_flopcount
 
     GB_MALLOC_MEMORY (Wfirst, ntasks, sizeof (int64_t)) ;
     GB_MALLOC_MEMORY (Wlast,  ntasks, sizeof (int64_t)) ;
-    GB_MALLOC_MEMORY (Flops,  ntasks+1, sizeof (int64_t)) ;
-    if (Wfirst == NULL || Wlast == NULL || Flops == NULL)
-    {
+    if (Wfirst == NULL || Wlast == NULL)
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GB_OUT_OF_MEMORY) ;
     }
 
     //--------------------------------------------------------------------------
-    // compute flop counts for C<M> = A*B
+    // compute flop counts for C=A*B, C<M>=A*B, or C<!M>=A*B
     //--------------------------------------------------------------------------
 
-    int64_t total_flops = 0 ;
-
-    int tid ;
-    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
-    for (tid = 0 ; tid < ntasks ; tid++)
+    int64_t total_Mwork = 0 ;
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
+        reduction(+:total_Mwork)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
     {
 
-        //----------------------------------------------------------------------
-        // skip this task if limit already reached
-        //----------------------------------------------------------------------
-
-        bool quick_return = false ;
-        int64_t flops_so_far = 0 ;
-        if (check_quick_return)
-        {
-            { 
-                #if GB_MICROSOFT
-                    #pragma omp critical (GB_AxB_flopcount)
-                    flops_so_far = total_flops ;
-                #else
-                    #pragma omp atomic read
-                    flops_so_far = total_flops ;
-                #endif
-            }
-            if (flops_so_far > floplimit) continue ;
-        }
-
         //----------------------------------------------------------------------
         // get the task descriptor
         //----------------------------------------------------------------------
 
-        int64_t kfirst = kfirst_slice [tid] ;
-        int64_t klast  = klast_slice  [tid] ;
-        int64_t task_flops = 0 ;
-        Wfirst [tid] = 0 ;
-        Wlast  [tid] = 0 ;
+        int64_t kfirst = kfirst_slice [taskid] ;
+        int64_t klast  = klast_slice  [taskid] ;
+        Wfirst [taskid] = 0 ;
+        Wlast  [taskid] = 0 ;
         int64_t mpleft = 0 ;     // for GB_lookup of the mask M
+        int64_t task_Mwork = 0 ;
 
         //----------------------------------------------------------------------
         // count flops for vectors kfirst to klast of B
         //----------------------------------------------------------------------
 
-        for (int64_t kk = kfirst ; !quick_return && (kk <= klast) ; kk++)
+        for (int64_t kk = kfirst ; kk <= klast ; kk++)
         {
 
+            // nnz (B (:,j)), for all tasks
+            int64_t bjnz = Bp [kk+1] - Bp [kk] ;
+            // C(:,j) is empty if the entire vector B(:,j) is empty
+            if (bjnz == 0) continue ;
+
             //------------------------------------------------------------------
             // find the part of B(:,j) to be computed by this task
             //------------------------------------------------------------------
 
             int64_t pB, pB_end ;
             GB_get_pA_and_pC (&pB, &pB_end, NULL,
-                tid, kk, kfirst, klast, pstart_slice, NULL, NULL, Bp) ;
-
+                taskid, kk, kfirst, klast, pstart_slice, NULL, NULL, Bp) ;
+            int64_t my_bjnz = pB_end - pB ;
             int64_t j = (B_is_hyper) ? Bh [kk] : kk ;
 
-            // C(:,j) is empty if B(:,j) is empty
-            int64_t bjnz = pB_end - pB ;
-            if (bjnz == 0) continue ;
-
             //------------------------------------------------------------------
             // see if M(:,j) is present and non-empty
             //------------------------------------------------------------------
 
+            int64_t bjflops = 0 ;
             int64_t im_first = -1, im_last = -1 ;
+            int64_t mjnz = 0 ;
             if (M != NULL)
-            { 
+            {
                 int64_t mpright = mnvec - 1 ;
                 int64_t pM, pM_end ;
                 GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright, j,
                     &pM, &pM_end) ;
-                int64_t mjnz = pM_end - pM ;
-                // C(:,j) is empty if M(:,j) is empty
-                if (mjnz == 0) continue ;
-                // M(:,j) has at least 1 entry; get 1st and last index in M(:,j)
-                im_first = Mi [pM] ;
-                im_last  = Mi [pM_end-1] ;
+                mjnz = pM_end - pM ;
+                // If M not complemented: C(:,j) is empty if M(:,j) is empty.
+                if (mjnz == 0 && !Mask_comp) continue ;
+                if (mjnz > 0)
+                {
+                    // M(:,j) not empty; get 1st and last index in M(:,j)
+                    im_first = Mi [pM] ;
+                    im_last  = Mi [pM_end-1] ;
+                    if (pB == Bp [kk])
+                    { 
+                        // this task owns the top part of B(:,j), so it can
+                        // account for the work to access M(:,j), without the
+                        // work being duplicated by other tasks working on
+                        // B(:,j)
+                        bjflops = mjnz ;
+                        // keep track of total work spent examining the mask.
+                        // If any B(:,j) is empty, M(:,j) can be ignored.  So
+                        // total_Mwork will be <= nnz (M).
+                        task_Mwork += mjnz ;
+                    }
+                }
             }
+            int64_t mjnz_much = 64 * mjnz ;
 
             //------------------------------------------------------------------
             // trim Ah on right
@@ -313,17 +291,17 @@ GrB_Info GB_AxB_flopcount
 
             int64_t pleft = 0 ;
             int64_t pright = anvec-1 ;
-            if (A_is_hyper && bjnz > 2)
+            if (A_is_hyper && my_bjnz > 2)
             { 
                 // trim Ah [0..pright] to remove any entries past last B(:,j)
                 GB_bracket_right (Bi [pB_end-1], Ah, 0, &pright) ;
             }
 
             //------------------------------------------------------------------
-            // count the flops to compute C(:,j)<M(:,j)> = A*B(:,j)
+            // count the flops to compute C(:,j)<#M(:,j)> = A*B(:,j)
             //------------------------------------------------------------------
 
-            int64_t bjflops = 0 ;
+            // where #M is either not present, M, or !M
 
             for ( ; pB < pB_end ; pB++)
             {
@@ -338,206 +316,149 @@ GrB_Info GB_AxB_flopcount
                 int64_t aknz = pA_end - pA ;
                 if (aknz == 0) continue ;
 
+                double bkjflops ;
+
                 // skip if intersection of A(:,k) and M(:,j) is empty
-                if (M != NULL)
-                { 
+                // and mask is not complemented (C<M>=A*B)
+                if (mask_is_M)
+                {
                     // A(:,k) is non-empty; get first and last index of A(:,k)
                     int64_t alo = Ai [pA] ;
                     int64_t ahi = Ai [pA_end-1] ;
                     if (ahi < im_first || alo > im_last) continue ;
+                    if (aknz > 256 && mjnz_much < aknz)
+                    { 
+                        // scan M(:j), and do binary search for A(i,j)
+                        bkjflops = mjnz * (1 + 4 * log2 ((double) aknz)) ;
+                    }
+                    else
+                    { 
+                        // scan A(:k), and lookup M(i,j)
+                        bkjflops = aknz ;
+                    }
                 }
-
-                // increment by flops for the single entry B(k,j)
-                // C(:,j)<M(:,j)> += A(:,k)*B(k,j).
-                bjflops += aknz ;
-
-                if (Bflops_per_entry != NULL)
+                else
                 { 
-                    // flops for the single entry, B(k,j)
-                    Bflops_per_entry [pB] = aknz ;
+                    // A(:,k)*B(k,j) requires aknz flops
+                    bkjflops = aknz ;
                 }
 
-                // check for a quick return
-                if (check_quick_return)
-                {
-                    flops_so_far += aknz ;
-                    if (flops_so_far > floplimit)
-                    { 
-                        // flop limit has been reached; terminate this and all
-                        // other tasks
-                        quick_return = true ;
-                        break ;
-                    }
-                }
+                // increment by flops for the single entry B(k,j)
+                // C(:,j)<#M(:,j)> += A(:,k)*B(k,j).
+                bjflops += bkjflops ;
             }
 
-            //------------------------------------------------------------------
-            // sum up the flops for this task
-            //------------------------------------------------------------------
-
-            task_flops += bjflops ;
-
             //------------------------------------------------------------------
             // log the flops for B(:,j)
             //------------------------------------------------------------------
 
-            if (Bflops != NULL)
+            if (kk == kfirst)
             { 
-                if (kk == kfirst)
-                { 
-                    Wfirst [tid] = bjflops ;
-                }
-                else if (kk == klast)
-                { 
-                    Wlast [tid] = bjflops ;
-                }
-                else
-                { 
-                    Bflops [kk] = bjflops ;
-                }
+                Wfirst [taskid] = bjflops ;
+            }
+            else if (kk == klast)
+            { 
+                Wlast [taskid] = bjflops ;
+            }
+            else
+            { 
+                Bflops [kk] = bjflops ;
             }
         }
 
-        //----------------------------------------------------------------------
-        // log the flops for this task
-        //----------------------------------------------------------------------
-
-        Flops [tid] = task_flops ;
-        if (check_quick_return)
-        { 
-            #if GB_MICROSOFT
-                #pragma omp critical (GB_AxB_flopcount)
-                total_flops += task_flops ;
-            #else
-                #pragma omp atomic update
-                total_flops += task_flops ;
-            #endif
-        }
+        // compute the total work to access the mask, which is <= nnz (M)
+        total_Mwork += task_Mwork ;
     }
 
     //--------------------------------------------------------------------------
-    // finalize the results
+    // reduce the first and last vector of each slice
     //--------------------------------------------------------------------------
 
-    if (check_quick_return)
-    { 
+    // See also Template/GB_reduce_each_vector.c
 
-        // The only output of this function is the result of this test:
-        (*result) = (total_flops <= floplimit) ;
+    int64_t kprior = -1 ;
 
-    }
-    else
+    for (int taskid = 0 ; taskid < ntasks ; taskid++)
     {
 
         //----------------------------------------------------------------------
-        // cumulative sum of Bflops and Bflops_per_entry
+        // sum up the partial flops that taskid computed for kfirst
         //----------------------------------------------------------------------
 
-        GB_cumsum (Flops, ntasks, NULL, 1) ;
-        int64_t total_flops = Flops [ntasks] ;
-        (*result) = (total_flops <= floplimit) ;
+        int64_t kfirst = kfirst_slice [taskid] ;
+        int64_t klast  = klast_slice  [taskid] ;
 
-        if (Bflops != NULL)
+        if (kfirst <= klast)
         {
-
-            //------------------------------------------------------------------
-            // reduce the first and last vector of each slice
-            //------------------------------------------------------------------
-
-            // See also Template/GB_reduce_each_vector.c
-
-            int64_t kprior = -1 ;
-
-            for (int tid = 0 ; tid < ntasks ; tid++)
+            int64_t pB = pstart_slice [taskid] ;
+            int64_t pB_end =
+                GB_IMIN (Bp [kfirst+1], pstart_slice [taskid+1]) ;
+            if (pB < pB_end)
             {
-
-                //--------------------------------------------------------------
-                // sum up the partial flops that task tid computed for kfirst
-                //--------------------------------------------------------------
-
-                int64_t kfirst = kfirst_slice [tid] ;
-                int64_t klast  = klast_slice  [tid] ;
-
-                if (kfirst <= klast)
-                {
-                    int64_t pB = pstart_slice [tid] ;
-                    int64_t pB_end =
-                        GB_IMIN (Bp [kfirst+1], pstart_slice [tid+1]) ;
-                    if (pB < pB_end)
-                    {
-                        if (kprior < kfirst)
-                        { 
-                            // This task is the first one that did work on
-                            // B(:,kfirst), so use it to start the reduction.
-                            Bflops [kfirst] = Wfirst [tid] ;
-                        }
-                        else
-                        { 
-                            // subsequent task for B(:,kfirst)
-                            Bflops [kfirst] += Wfirst [tid] ;
-                        }
-                        kprior = kfirst ;
-                    }
+                if (kprior < kfirst)
+                { 
+                    // This task is the first one that did work on
+                    // B(:,kfirst), so use it to start the reduction.
+                    Bflops [kfirst] = Wfirst [taskid] ;
+                }
+                else
+                { 
+                    // subsequent task for B(:,kfirst)
+                    Bflops [kfirst] += Wfirst [taskid] ;
                 }
+                kprior = kfirst ;
+            }
+        }
 
-                //--------------------------------------------------------------
-                // sum up the partial flops that task tid computed for klast
-                //--------------------------------------------------------------
+        //----------------------------------------------------------------------
+        // sum up the partial flops that taskid computed for klast
+        //----------------------------------------------------------------------
 
-                if (kfirst < klast)
+        if (kfirst < klast)
+        {
+            int64_t pB = Bp [klast] ;
+            int64_t pB_end   = pstart_slice [taskid+1] ;
+            if (pB < pB_end)
+            {
+                /* if */ ASSERT (kprior < klast) ;
+                { 
+                    // This task is the first one that did work on
+                    // B(:,klast), so use it to start the reduction.
+                    Bflops [klast] = Wlast [taskid] ;
+                }
+                /*
+                else
                 {
-                    int64_t pB = Bp [klast] ;
-                    int64_t pB_end   = pstart_slice [tid+1] ;
-                    if (pB < pB_end)
-                    {
-                        /* if */ ASSERT (kprior < klast) ;
-                        { 
-                            // This task is the first one that did work on
-                            // B(:,klast), so use it to start the reduction.
-                            Bflops [klast] = Wlast [tid] ;
-                        }
-                        /*
-                        else
-                        {
-                            // If kfirst < klast and B(:,klast) is not empty,
-                            // then this task is always the first one to do
-                            // work on B(:,klast), so this case is never used.
-                            ASSERT (GB_DEAD_CODE) ;
-                            // subsequent task to work on B(:,klast)
-                            Bflops [klast] += Wlast [tid] ;
-                        }
-                        */
-                        kprior = klast ;
-                    }
+                    // If kfirst < klast and B(:,klast) is not empty,
+                    // then this task is always the first one to do
+                    // work on B(:,klast), so this case is never used.
+                    ASSERT (GB_DEAD_CODE) ;
+                    // subsequent task to work on B(:,klast)
+                    Bflops [klast] += Wlast [taskid] ;
                 }
+                */
+                kprior = klast ;
             }
-
-            //------------------------------------------------------------------
-            // cumulative sum of Bflops
-            //------------------------------------------------------------------
-
-            // Bflops = cumsum ([0 Bflops]) ;
-            ASSERT (Bflops [bnvec] == 0) ;
-            GB_cumsum (Bflops, bnvec, NULL, nthreads) ;
-            // Bflops [bnvec] is now the total flop count
-            ASSERT (total_flops == Bflops [bnvec]) ;
-        }
-
-        if (Bflops_per_entry != NULL)
-        { 
-            // Bflops_per_entry = cumsum ([0 Bflops_per_entry]) ;
-            ASSERT (Bflops_per_entry [bnz] == 0) ;
-            GB_cumsum (Bflops_per_entry, bnz, NULL, nthreads) ;
-            // Bflops_per_entry [bnz] is now the total flop count
-            ASSERT (total_flops == Bflops_per_entry [bnz]) ;
         }
     }
 
+    //--------------------------------------------------------------------------
+    // cumulative sum of Bflops
+    //--------------------------------------------------------------------------
+
+    // Bflops = cumsum ([0 Bflops]) ;
+    ASSERT (Bflops [bnvec] == 0) ;
+    GB_cumsum (Bflops, bnvec, NULL, nthreads) ;
+    // Bflops [bnvec] is now the total flop count, including the time to
+    // compute A*B and to handle the mask.  total_Mwork is part of this total
+    // flop count, but is also returned separtely.
+
     //--------------------------------------------------------------------------
     // free workspace and return result
     //--------------------------------------------------------------------------
 
     GB_FREE_WORK ;
+    (*Mwork) = total_Mwork ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GB_AxB_heap.c b/Source/GB_AxB_heap.c
deleted file mode 100644
index 4a45e2191e..0000000000
--- a/Source/GB_AxB_heap.c
+++ /dev/null
@@ -1,336 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_heap: compute C<M> = A*B using a heap-based method
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Does not log an error; returns GrB_SUCCESS, GrB_OUT_OF_MEMORY, or GrB_PANIC.
-
-#include "GB_mxm.h"
-#include "GB_heap.h"
-#include "GB_jappend.h"
-#include "GB_bracket.h"
-#include "GB_iterator.h"
-#ifndef GBCOMPACT
-#include "GB_AxB__include.h"
-#endif
-
-GrB_Info GB_AxB_heap                // C<M>=A*B or C=A*B using a heap
-(
-    GrB_Matrix *Chandle,            // output matrix
-    const GrB_Matrix M_in,          // mask matrix for C<M>=A*B
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix
-    const GrB_Matrix B,             // input matrix
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    bool *mask_applied,             // if true, mask was applied
-    const int64_t bjnz_max          // max # entries in any vector of B
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    // only one thread does this entire function
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT (Chandle != NULL) ;
-    ASSERT_MATRIX_OK_OR_NULL (M_in, "M_in for heap A*B", GB0) ;
-    ASSERT_MATRIX_OK (A, "A for heap A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for heap A*B", GB0) ;
-    ASSERT (!GB_PENDING (M_in)) ; ASSERT (!GB_ZOMBIES (M_in)) ;
-    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
-    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for heap A*B", GB0) ;
-    ASSERT (A->vdim == B->vlen) ;
-    ASSERT (mask_applied != NULL) ;
-
-    //--------------------------------------------------------------------------
-    // get the semiring operators
-    //--------------------------------------------------------------------------
-
-    GrB_BinaryOp mult = semiring->multiply ;
-    GrB_Monoid add = semiring->add ;
-    ASSERT (mult->ztype == add->op->ztype) ;
-
-    bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
-    bool op_is_second = mult->opcode == GB_SECOND_opcode ;
-    bool A_is_pattern = false ;
-    bool B_is_pattern = false ;
-
-    if (flipxy)
-    { 
-        // z = fmult (b,a) will be computed
-        A_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
-        ASSERT (GB_IMPLIES (!A_is_pattern,
-            GB_Type_compatible (A->type, mult->ytype))) ;
-        ASSERT (GB_IMPLIES (!B_is_pattern,
-            GB_Type_compatible (B->type, mult->xtype))) ;
-    }
-    else
-    { 
-        // z = fmult (a,b) will be computed
-        A_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
-        ASSERT (GB_IMPLIES (!A_is_pattern,
-            GB_Type_compatible (A->type, mult->xtype))) ;
-        ASSERT (GB_IMPLIES (!B_is_pattern,
-            GB_Type_compatible (B->type, mult->ytype))) ;
-    }
-
-    (*Chandle) = NULL ;
-
-    // the heap method does not handle a complemented mask
-    GrB_Matrix M = (Mask_comp ? NULL : M_in) ;
-
-    //--------------------------------------------------------------------------
-    // allocate workspace
-    //--------------------------------------------------------------------------
-
-    // int64_t List [0..bjnz_max-1] ;
-    // GB_pointer_pair pA_pair [0..bjnz_max-1] ;
-    // GB_Element Heap [0..bjnz_max] ;              // Heap [0] unused
-
-    int64_t *List = NULL ;
-    GB_MALLOC_MEMORY (List, bjnz_max, sizeof (int64_t)) ;
-
-    GB_pointer_pair *pA_pair = NULL ;
-    GB_MALLOC_MEMORY (pA_pair, bjnz_max, sizeof (GB_pointer_pair)) ;
-
-    GB_Element *Heap = NULL ;
-    GB_MALLOC_MEMORY (Heap, bjnz_max + 1, sizeof (GB_Element)) ;
-
-    #define GB_HEAP_FREE_WORK                                           \
-    {                                                                   \
-        GB_FREE_MEMORY (List, bjnz_max, sizeof (int64_t)) ;             \
-        GB_FREE_MEMORY (pA_pair, bjnz_max, sizeof (GB_pointer_pair)) ;  \
-        GB_FREE_MEMORY (Heap, bjnz_max + 1, sizeof (GB_Element)) ;      \
-    }
-
-    if (List == NULL || pA_pair == NULL || Heap == NULL)
-    { 
-        // out of memory
-        GB_HEAP_FREE_WORK ;
-        return (GrB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // esimate nnz(C) and allocate C (both pattern and values)
-    //--------------------------------------------------------------------------
-
-    int64_t cvlen = A->vlen ;
-    int64_t cvdim = B->vdim ;
-    GrB_Type ctype = semiring->add->op->ztype ;
-
-    GrB_Info info = GB_AxB_alloc (Chandle, ctype, cvlen, cvdim, M, A, B, true,
-        15) ;
-
-    if (info != GrB_SUCCESS)
-    { 
-        // out of memory
-        GB_HEAP_FREE_WORK ;
-        return (info) ;
-    }
-
-    GrB_Matrix C = (*Chandle) ;
-
-    //--------------------------------------------------------------------------
-    // C = A*B with a heap and builtin semiring
-    //--------------------------------------------------------------------------
-
-    bool done = false ;
-
-#ifndef GBCOMPACT
-
-    //--------------------------------------------------------------------------
-    // define the worker for the switch factory
-    //--------------------------------------------------------------------------
-
-    #define GB_AheapB(add,mult,xyname) GB_AheapB_ ## add ## mult ## xyname
-
-    #define GB_AxB_WORKER(add,mult,xyname)                  \
-    {                                                       \
-        info = GB_AheapB (add,mult,xyname) (Chandle, M,     \
-            A, A_is_pattern, B, B_is_pattern,               \
-            List, pA_pair, Heap, bjnz_max) ;                \
-        done = (info != GrB_NO_VALUE) ;                     \
-    }                                                       \
-    break ;
-
-    //--------------------------------------------------------------------------
-    // launch the switch factory
-    //--------------------------------------------------------------------------
-
-    GB_Opcode mult_opcode, add_opcode ;
-    GB_Type_code xycode, zcode ;
-
-    if (GB_AxB_semiring_builtin (A, A_is_pattern, B, B_is_pattern, semiring,
-        flipxy, &mult_opcode, &add_opcode, &xycode, &zcode))
-    { 
-        #include "GB_AxB_factory.c"
-    }
-
-    if (! (info == GrB_SUCCESS || info == GrB_NO_VALUE))
-    { 
-        // out of memory
-        GB_HEAP_FREE_WORK ;
-        return (info) ;
-    }
-
-#endif
-
-    //--------------------------------------------------------------------------
-    // user semirings created at compile time
-    //--------------------------------------------------------------------------
-
-    if (semiring->object_kind == GB_USER_COMPILED)
-    {
-        // determine the required type of A and B for the user semiring
-        GrB_Type atype_required, btype_required ;
-
-        if (flipxy)
-        { 
-            // A is passed as y, and B as x, in z = mult(x,y)
-            atype_required = mult->ytype ;
-            btype_required = mult->xtype ;
-        }
-        else
-        { 
-            // A is passed as x, and B as y, in z = mult(x,y)
-            atype_required = mult->xtype ;
-            btype_required = mult->ytype ;
-        }
-
-        if (A->type == atype_required && B->type == btype_required)
-        {
-            info = GB_AxB_user (GxB_AxB_HEAP, semiring, Chandle, M, A, B,
-                flipxy,
-                /* heap: */ List, pA_pair, Heap, bjnz_max,
-                /* Gustavson: */ NULL,
-                /* dot2: */ NULL, NULL, 1, 1, 1, NULL,
-                /* dot3: */ NULL, 0) ;
-            done = true ;
-            if (info != GrB_SUCCESS)
-            { 
-                // out of memory
-                GB_HEAP_FREE_WORK ;
-                return (info) ;
-            }
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // C = A*B, with a heap, and typecasting
-    //--------------------------------------------------------------------------
-
-    if (!done)
-    {
-
-        //----------------------------------------------------------------------
-        // get operators, functions, workspace, contents of A, B, C, and M
-        //----------------------------------------------------------------------
-
-        GxB_binary_function fmult = mult->function ;
-        GxB_binary_function fadd  = add->op->function ;
-
-        size_t csize = C->type->size ;
-        size_t asize = A_is_pattern ? 0 : A->type->size ;
-        size_t bsize = B_is_pattern ? 0 : B->type->size ;
-
-        size_t xsize = mult->xtype->size ;
-        size_t ysize = mult->ytype->size ;
-
-        // scalar workspace
-        // flipxy false: aik = (xtype) A(i,k) and bkj = (ytype) B(k,j)
-        // flipxy true:  aik = (ytype) A(i,k) and bkj = (xtype) B(k,j)
-        GB_void aik [GB_VLA(flipxy ? ysize : xsize)] ;
-        GB_void bkj [GB_VLA(flipxy ? xsize : ysize)] ;
-        GB_void t [GB_VLA(csize)] ;
-
-        GB_void *GB_RESTRICT Cx = C->x ;
-        GB_void *cij = Cx ;        // advances through each entry of C
-
-        // GB_void *identity = add->identity ;
-
-        GB_cast_function cast_A, cast_B ;
-        if (flipxy)
-        { 
-            // A is typecasted to y, and B is typecasted to x
-            cast_A = A_is_pattern ? NULL : 
-                     GB_cast_factory (mult->ytype->code, A->type->code) ;
-            cast_B = B_is_pattern ? NULL : 
-                     GB_cast_factory (mult->xtype->code, B->type->code) ;
-        }
-        else
-        { 
-            // A is typecasted to x, and B is typecasted to y
-            cast_A = A_is_pattern ? NULL :
-                     GB_cast_factory (mult->xtype->code, A->type->code) ;
-            cast_B = B_is_pattern ? NULL :
-                     GB_cast_factory (mult->ytype->code, B->type->code) ;
-        }
-
-        //----------------------------------------------------------------------
-        // C = A*B via the heap, function pointers, and typecasting
-        //----------------------------------------------------------------------
-
-        // aik = A(i,k), of size asize
-        #define GB_GETA(aik,Ax,pA)                                          \
-            if (!A_is_pattern) cast_A (aik, Ax +((pA)*asize), asize) ;
-
-        // bkj = B(k,j), of size bsize
-        #define GB_GETB(bkj,Bx,pB)                                          \
-            if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize) ;
-
-        // C(i,j) = A(i,k) * B(k,j)
-        #define GB_MULT(cij, aik, bkj)                                      \
-            GB_MULTIPLY (cij, aik, bkj) ;                                   \
-
-        // C(i,j) += A(i,k) * B(k,j)
-        #define GB_MULTADD(cij, aik, bkj)                                   \
-            GB_MULTIPLY (t, aik, bkj) ;                                     \
-            fadd (cij, cij, t) ;
-
-        // C->x or cnz has moved so the pointer to cij needs to be recomputed
-        #define GB_CIJ_REACQUIRE(cij, cnz)  cij = Cx + cnz * csize ;
-
-        // save the value of C(i,j) by advancing cij pointer to next value
-        #define GB_CIJ_SAVE(cij,p)          cij += csize ;
-
-        #define GB_ATYPE GB_void
-        #define GB_BTYPE GB_void
-
-        if (flipxy)
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,y,x)
-            #include "GB_AxB_heap_meta.c"
-            #undef GB_MULTIPLY
-        }
-        else
-        { 
-            #define GB_MULTIPLY(z,x,y) fmult (z,x,y)
-            #include "GB_AxB_heap_meta.c"
-            #undef GB_MULTIPLY
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // trim the size of C: this cannot fail
-    //--------------------------------------------------------------------------
-
-    GB_HEAP_FREE_WORK ;
-    info = GB_ix_realloc (C, GB_NNZ (C), true, NULL) ;
-    ASSERT (info == GrB_SUCCESS) ;
-    ASSERT_MATRIX_OK (C, "heap: C = A*B output", GB0) ;
-    ASSERT (*Chandle == C) ;
-    (*mask_applied) = (M != NULL) ;
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_AxB_meta.c b/Source/GB_AxB_meta.c
index dcbaf6a677..7f31d5c211 100644
--- a/Source/GB_AxB_meta.c
+++ b/Source/GB_AxB_meta.c
@@ -2,25 +2,29 @@
 // GB_AxB_meta: C<M>=A*B meta algorithm
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// C or C<M> = A*B, A'*B, A*B', or A'*B' : both symbolic and numeric, with the
-// optional mask matrix.  This function is called by GB_mxm only.  If the mask
-// matrix is present, it is not complemented, since this function can only
-// handle a non-complemented mask matrix.  A complemented mask is handled in
-// GB_accum_mask, after this matrix C is computed, in GB_mxm.  The result of
-// this matrix is the T matrix in GB_mxm.
+// C, C<M>, C<!M> = A*B, A'*B, A*B', or A'*B' : both symbolic and numeric, with
+// the optional mask matrix.  This function is called by GB_mxm only.  If the
+// mask matrix is present, it can be regular or complemented, and either valued
+// or structural.
+
+// This algorithm may decide that it is more efficient to apply the mask later,
+// in GB_accum_mask, after this matrix C is computed, in GB_mxm.  The result is
+// either the T matrix in GB_mxm, or (if done in-place), the final output
+// matrix C passed in from the user (C_in_place).
 
 // The method is chosen automatically:  a gather/scatter saxpy method
-// (Gustavson), a heap-based saxpy method, or a dot product method.
+// (Gustavson), a heap-based saxpy method, or a dot product method.  The
+// AxB_method can modify this automatic choice, if set to a non-default value.
+// AxB_method_used is DOT, SAXPY, or DEFAULT (the latter denotes the row/col
+// scaling methods).
 
 // FUTURE:: an outer-product method for C=A*B'
 
-// FUTURE:: a hash-based method for C=A*B
-
 #define GB_FREE_ALL             \
 {                               \
     GB_MATRIX_FREE (Chandle) ;  \
@@ -34,11 +38,15 @@
 
 GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
 (
-    GrB_Matrix *Chandle,            // output matrix C
+    GrB_Matrix *Chandle,            // output matrix (if not done in place)
+    GrB_Matrix C_in_place,          // input/output matrix, if done in place
+    bool C_replace,                 // C matrix descriptor
     const bool C_is_csc,            // desired CSR/CSC format of C
     GrB_Matrix *MT_handle,          // return MT = M' to caller, if computed
     const GrB_Matrix M_in,          // mask for C<M> (not complemented)
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
+    const GrB_BinaryOp accum,       // accum operator for C_in_place += A*B
     const GrB_Matrix A_in,          // input matrix
     const GrB_Matrix B_in,          // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -46,7 +54,8 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     bool B_transpose,               // if true, use B', else B
     bool flipxy,                    // if true, do z=fmult(b,a) vs fmult(a,b)
     bool *mask_applied,             // if true, mask was applied
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
+    bool *done_in_place,            // if true, C was computed in place
+    GrB_Desc_Value AxB_method,      // for auto vs user selection of methods
     GrB_Desc_Value *AxB_method_used,// method selected
     GB_Context Context
 )
@@ -56,6 +65,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     // check inputs
     //--------------------------------------------------------------------------
 
+    ASSERT_MATRIX_OK_OR_NULL (C_in_place, "C_in_place for meta A*B", GB0) ;
     ASSERT_MATRIX_OK_OR_NULL (M_in, "M for meta A*B", GB0) ;
     ASSERT_MATRIX_OK (A_in, "A_in for meta A*B", GB0) ;
     ASSERT_MATRIX_OK (B_in, "B_in for meta A*B", GB0) ;
@@ -80,8 +90,50 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     GrB_Matrix MT = NULL ;
 
     (*mask_applied) = false ;
+    (*done_in_place) = false ;
     (*AxB_method_used) = GxB_DEFAULT ;
 
+    if (AxB_method == GxB_AxB_HEAP)
+    { 
+        // FUTURE::: Heap method not yet reinstalled; using Hash instead
+        AxB_method = GxB_AxB_HASH ;
+    }
+
+    //--------------------------------------------------------------------------
+    // see if the work can be done in place
+    //--------------------------------------------------------------------------
+
+    // C can be computed in place if it is already dense, and if it is
+    // guaranteed to remain dense after the computation is done.  This case
+    // requires the accum operator to be present and it must match the monoid
+    // of the semiring.  C_replace must be false, or effectively false.
+    // Finally, C must not transposed on output.
+
+    bool can_do_in_place = false ;
+    if (C_in_place != NULL && accum != NULL)
+    { 
+        // check if C_in_place is competely dense:  all entries present and no
+        // pending work
+        bool C_is_dense = !GB_PENDING_OR_ZOMBIES (C_in_place)
+            && GB_is_dense (C_in_place) ;
+
+        // accum must be present, and must match the monoid of the semiring,
+        // and the ztype of the monoid must match the type of C
+        bool accum_is_monoid = (accum == semiring->add->op) 
+            && (C_in_place->type == accum->ztype) ;
+
+        // C += A*B with C_replace ignored (effectively false)
+        // C<M> += A*B with C_replace false
+        // C<!M> += A*B with C_replace false
+        can_do_in_place =
+            C_is_dense
+            && accum_is_monoid
+            && ((M_in == NULL) || (M_in != NULL && !C_replace)) ;
+
+        // C must also not be transposed on output; see below.
+        // Nor can it be aliased with any input matrix.
+    }
+
     //--------------------------------------------------------------------------
     // handle the CSR/CSC formats of C, M, A, and B
     //--------------------------------------------------------------------------
@@ -195,11 +247,6 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         B = B_in ; btrans = B_transpose ;
     }
 
-    // Assuming the swap_rule == C_transpose, C no longer needs to be
-    // transposed, but the following assertion only holds if swap_rule ==
-    // C_transpose.
-    ASSERT (!C_transpose) ;
-
     ASSERT_MATRIX_OK (A, "final A for A*B", GB0) ;
     ASSERT_MATRIX_OK (B, "final B for A*B", GB0) ;
 
@@ -211,6 +258,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     // transpose: typecast, no op, not in place
 
     GrB_Matrix M ;
+    bool M_transposed ;
 
     if (M_transpose && M_in != NULL)
     { 
@@ -218,21 +266,57 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         // unless an error occurs, but is returned to the caller.
         GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M_in, NULL, Context)) ;
         M = MT ;
+        M_transposed = true ;
     }
     else
     { 
         // M_in can be used as-is; it may be NULL
         M = M_in ;
+        M_transposed = false ;
     }
 
     ASSERT_MATRIX_OK_OR_NULL (M, "final M for A*B", GB0) ;
 
+    //--------------------------------------------------------------------------
+    // check additional conditions for in-place computation of C
+    //--------------------------------------------------------------------------
+
+    if (can_do_in_place)
+    {
+        // C cannot be done in place if it is aliased with any input matrix.
+        // Also cannot compute C in place (yet) if it is to be transposed.
+        bool C_aliased =
+            GB_aliased (C_in_place, M) ||
+            GB_aliased (C_in_place, A) ||
+            GB_aliased (C_in_place, B) ;
+        if (C_transpose || C_aliased)
+        { 
+            can_do_in_place = false ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // burble
+    //--------------------------------------------------------------------------
+
+    #if GB_BURBLE
+    char *M_str = (M == NULL) ? "" : (Mask_comp ?  "<!M>" : "<M>") ;
+    #define GB_PROP_LEN (GB_LEN+128)
+    char A_str [GB_PROP_LEN+1] ;
+    char B_str [GB_PROP_LEN+1] ;
+    snprintf (A_str, GB_PROP_LEN, "A: "GBd"-by-"GBd", %s, "GBd" entries",
+        GB_NROWS (A), GB_NCOLS (A), A->type->name, GB_NNZ (A)) ;
+    snprintf (B_str, GB_PROP_LEN, "B: "GBd"-by-"GBd", %s, "GBd" entries",
+        GB_NROWS (B), GB_NCOLS (B), B->type->name, GB_NNZ (B)) ;
+    #endif
+
     //--------------------------------------------------------------------------
     // typecast A and B when transposing them, if needed
     //--------------------------------------------------------------------------
 
     bool op_is_first  = semiring->multiply->opcode == GB_FIRST_opcode ;
     bool op_is_second = semiring->multiply->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = semiring->multiply->opcode == GB_PAIR_opcode ;
     bool A_is_pattern = false ;
     bool B_is_pattern = false ;
 
@@ -240,16 +324,16 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     if (flipxy)
     { 
         // A is passed as y, and B as x, in z = mult(x,y)
-        A_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
+        A_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
         atype_required = A_is_pattern ? A->type : semiring->multiply->ytype ;
         btype_required = B_is_pattern ? B->type : semiring->multiply->xtype ;
     }
     else
     { 
         // A is passed as x, and B as y, in z = mult(x,y)
-        A_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
+        A_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
         atype_required = A_is_pattern ? A->type : semiring->multiply->xtype ;
         btype_required = B_is_pattern ? B->type : semiring->multiply->ytype ;
     }
@@ -287,7 +371,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         }
 
         //----------------------------------------------------------------------
-        // select the method for C<M> = A'*B
+        // select the method for C<M>=A'*B
         //----------------------------------------------------------------------
 
         // A'*B is being computed: use the dot product without computing A'
@@ -299,7 +383,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         // which M(i,j)=1 is computed via a dot product, C(i,j) =
         // A(:,i)'*B(:,j).  If the mask is not present, the dot-product method
         // is very slow in general, and thus the saxpy method is usually used
-        // instead (via Gustavson or heap).
+        // instead.
 
         bool do_rowscale = false ;
         bool do_colscale = false ;
@@ -320,7 +404,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
             // auto selection for A'*B
             if (M != NULL && !Mask_comp)
             { 
-                // C<M> = A'*B always uses the dot product method
+                // C<M>=A'*B uses the masked dot product method
                 do_adotb = true ;
             }
             else if (A->vdim == 1 || B->vdim == 1)
@@ -334,12 +418,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
                 // or B are dense, since the dot product method requires no
                 // workspace in that case and can exploit dense vectors of A
                 // and/or B.
-                GrB_Index bnzmax, anzmax ;
-                bool A_is_dense = GB_Index_multiply (&anzmax, A->vlen, A->vdim)
-                                  && (anzmax == GB_NNZ (A)) ;
-                bool B_is_dense = GB_Index_multiply (&bnzmax, B->vlen, B->vdim)
-                                  && (bnzmax == GB_NNZ (B)) ;
-                do_adotb = A_is_dense || B_is_dense ;
+                do_adotb = GB_is_dense (A) || GB_is_dense (B) ;
             }
         }
         else
@@ -349,34 +428,40 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         }
 
         //----------------------------------------------------------------------
-        // C<M> = A'*B
+        // C<M>=A'*B
         //----------------------------------------------------------------------
 
         if (do_rowscale)
         { 
             // C = D*B
+            GBBURBLE ("C%s=A'*B, rowscale ", M_str) ;
             GB_OK (GB_AxB_rowscale (Chandle, A, B, semiring, flipxy, Context)) ;
         }
         else if (do_colscale)
         { 
             // C = A'*D
+            GBBURBLE ("C%s=A'*B, colscale (transposed %s) ", M_str, A_str) ;
             GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
             GB_OK (GB_AxB_colscale (Chandle, AT, B, semiring, flipxy, Context));
         }
         else if (do_adotb)
         { 
-            // C<M> = A'*B via dot product method
-            GB_OK (GB_AxB_dot_parallel (Chandle, M, Mask_comp, A, B, semiring,
-                flipxy, mask_applied, Context)) ;
+            // C<M>=A'*B via dot product, or C_in_place<M>+=A'*B if in place
+            GBBURBLE ("C%s=A'*B, %sdot_product ", M_str,
+                (M != NULL && !Mask_comp) ? "masked_" : "") ;
+            GB_OK (GB_AxB_dot (Chandle, (can_do_in_place) ? C_in_place : NULL,
+                M, Mask_comp, Mask_struct, A, B, semiring, flipxy,
+                mask_applied, done_in_place, Context)) ;
             (*AxB_method_used) = GxB_AxB_DOT ;
         }
         else
         { 
-            // C<M> = A'*B via saxpy: Gustavson or heap method
+            // C = A'*B via saxpy3: Gustavson + Hash method
+            GBBURBLE ("C%s=A'*B, saxpy (transposed %s) ", M_str, A_str) ;
             GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
-            GB_OK (GB_AxB_saxpy_parallel (Chandle, M, Mask_comp, AT, B,
-                semiring, flipxy, AxB_method, AxB_method_used, mask_applied,
-                Context)) ;
+            GB_OK (GB_AxB_saxpy3 (Chandle, M, Mask_comp, Mask_struct,
+                AT, B, semiring, flipxy, mask_applied, AxB_method, Context)) ;
+            (*AxB_method_used) = GxB_AxB_SAXPY ;
         }
 
     }
@@ -390,30 +475,36 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         if (M == NULL && GB_is_diagonal (B, Context))
         { 
             // C = A*D
+            GBBURBLE ("C%s=A*B', colscale ", M_str) ;
             GB_OK (GB_AxB_colscale (Chandle, A, B, semiring, flipxy, Context)) ;
         }
         else if (M == NULL && GB_is_diagonal (A, Context))
         { 
             // C = D*B'
+            GBBURBLE ("C%s=A*B', rowscale (transposed %s) ", M_str, B_str) ;
             GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
             GB_OK (GB_AxB_rowscale (Chandle, A, BT, semiring, flipxy, Context));
         }
         else if (AxB_method == GxB_AxB_DOT)
         { 
-            // C<M> = A*B' via dot product
+            // C<M>=A*B' via dot product, or C_in_place<M>+=A*B' if in place
+            GBBURBLE ("C%s=A*B', dot_product (transposed %s) (transposed %s) ",
+                M_str, A_str, B_str) ;
             GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
             GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
-            GB_OK (GB_AxB_dot_parallel (Chandle, M, Mask_comp, AT, BT, semiring,
-                flipxy, mask_applied, Context)) ;
+            GB_OK (GB_AxB_dot (Chandle, (can_do_in_place) ? C_in_place : NULL,
+                M, Mask_comp, Mask_struct, AT, BT, semiring, flipxy,
+                mask_applied, done_in_place, Context)) ;
             (*AxB_method_used) = GxB_AxB_DOT ;
         }
         else
         { 
-            // C<M> = A*B' via saxpy: Gustavson or heap method
+            // C = A*B' via saxpy3: Gustavson + Hash method
+            GBBURBLE ("C%s=A*B', saxpy (transposed %s) ", M_str, B_str) ;
             GB_OK (GB_transpose (&BT, btype_required, true, B, NULL, Context)) ;
-            GB_OK (GB_AxB_saxpy_parallel (Chandle, M, Mask_comp, A, BT,
-                semiring, flipxy, AxB_method, AxB_method_used, mask_applied,
-                Context)) ;
+            GB_OK (GB_AxB_saxpy3 (Chandle, M, Mask_comp, Mask_struct,
+                A, BT, semiring, flipxy, mask_applied, AxB_method, Context)) ;
+            (*AxB_method_used) = GxB_AxB_SAXPY ;
         }
 
     }
@@ -427,30 +518,38 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
         if (M == NULL && GB_is_diagonal (B, Context))
         { 
             // C = A*D, column scale
+            GBBURBLE ("C%s=A*B, colscale ", M_str) ;
             GB_OK (GB_AxB_colscale (Chandle, A, B, semiring, flipxy, Context)) ;
         }
         else if (M == NULL && GB_is_diagonal (A, Context))
         { 
             // C = D*B, row scale
+            GBBURBLE ("C%s=A*B, rowscale ", M_str) ;
             GB_OK (GB_AxB_rowscale (Chandle, A, B, semiring, flipxy, Context)) ;
         }
         else if (AxB_method == GxB_AxB_DOT)
         { 
-            // C<M> = A*B via dot product
+            // C<M>=A*B via dot product, or C_in_place<M>+=A*B if in place
+            GBBURBLE ("C%s=A*B', dot_product (transposed %s) ", M_str, A_str) ;
             GB_OK (GB_transpose (&AT, atype_required, true, A, NULL, Context)) ;
-            GB_OK (GB_AxB_dot_parallel (Chandle, M, Mask_comp, AT, B, semiring,
-                flipxy, mask_applied, Context)) ;
+            GB_OK (GB_AxB_dot (Chandle, (can_do_in_place) ? C_in_place : NULL,
+                M, Mask_comp, Mask_struct, AT, B, semiring, flipxy,
+                mask_applied, done_in_place, Context)) ;
             (*AxB_method_used) = GxB_AxB_DOT ;
         }
         else
         { 
-            // C<M> = A*B via saxpy: Gustavson or heap method
-            GB_OK (GB_AxB_saxpy_parallel (Chandle, M, Mask_comp, A, B,
-                semiring, flipxy, AxB_method, AxB_method_used, mask_applied,
-                Context)) ;
+            // C = A*B via saxpy3: Gustavson + Hash method
+            GBBURBLE ("C%s=A*B, saxpy ", M_str) ;
+            GB_OK (GB_AxB_saxpy3 (Chandle, M, Mask_comp, Mask_struct,
+                A, B, semiring, flipxy, mask_applied, AxB_method, Context)) ;
+            (*AxB_method_used) = GxB_AxB_SAXPY ;
         }
     }
 
+    if (M_transposed) { GBBURBLE ("(M transposed) ") ; }
+    if ((M != NULL) && !(*mask_applied)) { GBBURBLE ("(mask later) ") ; }
+
     //--------------------------------------------------------------------------
     // handle C_transpose and assign the CSR/CSC format
     //--------------------------------------------------------------------------
@@ -460,9 +559,19 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     // applying the accum operator and/or writing the result back to the user's
     // C.
 
-    GrB_Matrix C = (*Chandle) ;
-    ASSERT (C != NULL) ;
-    C->is_csc = C_transpose ? !C_is_csc : C_is_csc ;
+    if (*done_in_place)
+    { 
+        // C can be done in place only if C is not transposed on output
+        ASSERT_MATRIX_OK (C_in_place, "C_in_place output for all C=A*B", GB0) ;
+        ASSERT (C_in_place->is_csc == C_is_csc) ;
+    }
+    else
+    { 
+        GrB_Matrix C = (*Chandle) ;
+        ASSERT (C != NULL) ;
+        C->is_csc = C_transpose ? !C_is_csc : C_is_csc ;
+        ASSERT_MATRIX_OK (C, "C output for all C=A*B", GB0) ;
+    }
 
     //--------------------------------------------------------------------------
     // free workspace and return result
@@ -470,9 +579,7 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
 
     GB_MATRIX_FREE (&AT) ;
     GB_MATRIX_FREE (&BT) ;
-    ASSERT_MATRIX_OK (C, "C output for all C=A*B", GB0) ;
     ASSERT_MATRIX_OK_OR_NULL (MT, "MT if computed", GB0) ;
-
     if (MT_handle != NULL)
     { 
         // return MT to the caller, if computed and the caller wants it
diff --git a/Source/GB_AxB_rowscale.c b/Source/GB_AxB_rowscale.c
index ed41338d27..0014399a30 100644
--- a/Source/GB_AxB_rowscale.c
+++ b/Source/GB_AxB_rowscale.c
@@ -2,7 +2,7 @@
 // GB_AxB_rowscale: C = D*B, row scale with diagonal matrix D
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -54,14 +54,15 @@ GrB_Info GB_AxB_rowscale            // C = D*B, row scale with diagonal D
 
     bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
     bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
     bool D_is_pattern = false ;
     bool B_is_pattern = false ;
 
     if (flipxy)
     { 
         // z = fmult (b,a) will be computed
-        D_is_pattern = op_is_first  ;
-        B_is_pattern = op_is_second ;
+        D_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
         ASSERT (GB_IMPLIES (!D_is_pattern,
             GB_Type_compatible (D->type, mult->ytype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -70,8 +71,8 @@ GrB_Info GB_AxB_rowscale            // C = D*B, row scale with diagonal D
     else
     { 
         // z = fmult (a,b) will be computed
-        D_is_pattern = op_is_second ;
-        B_is_pattern = op_is_first  ;
+        D_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
         ASSERT (GB_IMPLIES (!D_is_pattern,
             GB_Type_compatible (D->type, mult->xtype))) ;
         ASSERT (GB_IMPLIES (!B_is_pattern,
@@ -122,8 +123,8 @@ GrB_Info GB_AxB_rowscale            // C = D*B, row scale with diagonal D
         GB_Opcode opcode ;
         GB_Type_code xycode, zcode ;
 
-        if (GB_binop_builtin (D, D_is_pattern, B, B_is_pattern, mult,
-            flipxy, &opcode, &xycode, &zcode))
+        if (GB_binop_builtin (D->type, D_is_pattern, B->type, B_is_pattern,
+            mult, flipxy, &opcode, &xycode, &zcode))
         { 
             #include "GB_binop_factory.c"
         }
@@ -136,6 +137,7 @@ GrB_Info GB_AxB_rowscale            // C = D*B, row scale with diagonal D
 
     if (!done)
     {
+        GB_BURBLE_MATRIX (C, "generic ") ;
 
         //----------------------------------------------------------------------
         // get operators, functions, workspace, contents of D, B, and C
@@ -204,6 +206,7 @@ GrB_Info GB_AxB_rowscale            // C = D*B, row scale with diagonal D
 
         // no vectorization
         #define GB_PRAGMA_VECTORIZE
+        #define GB_PRAGMA_VECTORIZE_DOT
 
         if (flipxy)
         { 
diff --git a/Source/GB_AxB_saxpy3.c b/Source/GB_AxB_saxpy3.c
new file mode 100644
index 0000000000..ad0a87dfcd
--- /dev/null
+++ b/Source/GB_AxB_saxpy3.c
@@ -0,0 +1,1107 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3: compute C=A*B, C<M>=A*B, or C<!M>=A*B in parallel
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// GB_AxB_saxpy3 computes C=A*B, C<M>=A*B, or C<!M>=A*B in parallel.  If the
+// mask matrix M has too many entries compared to the work to compute A*B, then
+// it is not applied.  Instead, M is ignored and C=A*B is computed.  The mask
+// is applied later, in GB_mxm.
+
+// For simplicity, this discussion and all comments in this code assume that
+// all matrices are in CSC format, but the algorithm is CSR/CSC agnostic.
+
+// The matrix B is split into two kinds of tasks: coarse and fine.  A coarse
+// task computes C(:,j1:j2) = A*B(:,j1:j2), for a unique set of vectors j1:j2.
+// Those vectors are not shared with any other tasks.  A fine task works with a
+// team of other fine tasks to compute C(:,j) for a single vector j.  Each fine
+// task computes A*B(k1:k2,j) for a unique range k1:k2, and sums its results
+// into C(:,j) via atomic operations.
+
+// Each coarse or fine task uses either Gustavson's method [1] or the Hash
+// method [2].  There are 4 kinds of tasks:
+
+//      fine Gustavson task
+//      fine hash task
+//      coarse Gustason task
+//      coarse hash task
+
+// Each of the 4 kinds tasks are then subdivided into 3 variants, for C=A*B,
+// C<M>=A*B, and C<!M>=A*B, giving a total of 12 different types of tasks.
+
+// Fine tasks are used when there would otherwise be too much work for a single
+// task to compute the single vector C(:,j).  Fine tasks share all of their
+// workspace with the team of fine tasks computing C(:,j).  Coarse tasks are
+// prefered since they require less synchronization, but fine tasks allow for
+// better parallelization when B has only a few vectors.  If B consists of a
+// single vector (for GrB_mxv if A is in CSC format and not transposed, or
+// for GrB_vxm if A is in CSR format and not transpose), then the only way to
+// get parallelism is via fine tasks.  If a single thread is used for this
+// case, a single-vector coarse task is used.
+
+// To select between the Hash method or Gustavson's method for each task, the
+// hash table size is first found.  The hash table size for a hash task depends
+// on the maximum flop count for any vector in that task (which is just one
+// vector for the fine tasks).  It is set to twice the smallest power of 2 that
+// is greater than the flop count to compute that vector (plus the # of entries
+// in M(:,j) for tasks that compute C<M>=A*B or C<!M>=A*B).  This size ensures
+// the results will fit in the hash table, and with hopefully only a modest
+// number of collisions.  If the hash table size exceeds a threshold (currently
+// m/16 if C is m-by-n), then Gustavson's method is used instead, and the hash
+// table size is set to m, to serve as the gather/scatter workspace for
+// Gustavson's method.
+
+// The workspace allocated depends on the type of task.  Let s be the hash
+// table size for the task, and C is m-by-n (assuming all matrices are CSC; if
+// CSR, then m is replaced with n).
+//
+//      fine Gustavson task (shared):   uint8_t Hf [m] ; ctype Hx [m] ;
+//      fine hash task (shared):        int64_t Hf [s] ; ctype Hx [s] ;
+//      coarse Gustavson task:          int64_t Hf [m] ; ctype Hx [m] ;
+//      coarse hash task:               int64_t Hf [s] ; ctype Hx [s] ;
+//                                      int64_t Hi [s] ; 
+//
+// Note that the Hi array is needed only for the coarse hash task.  Additional
+// workspace is allocated to construct the list of tasks, but this is freed
+// before C is constructed.
+
+// References:
+
+// [1] Fred G. Gustavson. 1978. Two Fast Algorithms for Sparse Matrices:
+// Multiplication and Permuted Transposition. ACM Trans. Math. Softw.  4, 3
+// (Sept. 1978), 250–269. DOI:https://doi.org/10.1145/355791.355796
+
+// [2] Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Aydın Buluç. 2018.
+// High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore
+// Architectures. In Proc. 47th Intl. Conf. on Parallel Processing (ICPP '18).
+// Association for Computing Machinery, New York, NY, USA, Article 34, 1–10.
+// DOI:https://doi.org/10.1145/3229710.3229720
+
+//------------------------------------------------------------------------------
+
+#include "GB_mxm.h"
+#include "GB_AxB_saxpy3.h"
+#ifndef GBCOMPACT
+#include "GB_AxB__include.h"
+#endif
+
+//------------------------------------------------------------------------------
+// control parameters for generating parallel tasks
+//------------------------------------------------------------------------------
+
+#define GB_NTASKS_PER_THREAD 2
+#define GB_COSTLY 1.2
+#define GB_FINE_WORK 2
+#define GB_MWORK_ALPHA 0.01
+
+//------------------------------------------------------------------------------
+// free workspace
+//------------------------------------------------------------------------------
+
+// This workspace is not needed in the GB_Asaxpy3B* worker functions.
+#define GB_FREE_INITIAL_WORK                                                \
+{                                                                           \
+    GB_FREE_MEMORY (Bflops2, max_bjnz+1, sizeof (int64_t)) ;                \
+    GB_FREE_MEMORY (Coarse_initial, ntasks_initial+1, sizeof (int64_t)) ;   \
+    GB_FREE_MEMORY (Fine_slice, ntasks+1, sizeof (int64_t)) ;               \
+}
+
+#define GB_FREE_WORK                                                        \
+{                                                                           \
+    GB_FREE_INITIAL_WORK ;                                                  \
+    GB_FREE_MEMORY (TaskList, ntasks, sizeof (GB_saxpy3task_struct)) ;      \
+    GB_FREE_MEMORY (Hi_all, Hi_size_total, sizeof (int64_t)) ;              \
+    GB_FREE_MEMORY (Hf_all, Hf_size_total, sizeof (int64_t)) ;              \
+    GB_FREE_MEMORY (Hx_all, Hx_size_total, 1) ;                             \
+}
+
+#define GB_FREE_ALL                                                         \
+{                                                                           \
+    GB_FREE_WORK ;                                                          \
+    GB_MATRIX_FREE (Chandle) ;                                              \
+}
+
+//------------------------------------------------------------------------------
+// GB_hash_table_size
+//------------------------------------------------------------------------------
+
+// flmax is the max flop count for computing A*B(:,j), for any vector j that
+// this task computes.  If the mask M is present, flmax also includes the
+// number of entries in M(:,j).  GB_hash_table_size determines the hash table
+// size for this task, which is twice the smallest power of 2 larger than
+// flmax.  If flmax is large enough, the hash_size is returned as cvlen, so
+// that Gustavson's method will be used instead of the Hash method.
+
+// By default, Gustavson vs Hash is selected automatically.  AxB_method can be
+// selected via the descriptor or a global setting, as the non-default
+// GxB_AxB_GUSTAVSON or GxB_AxB_HASH settings, to enforce the selection of
+// either of those methods.  However, if Hash is selected by the hash table
+// exceeds cvlen, then Gustavson's method is used instead.
+
+static inline int64_t GB_hash_table_size
+(
+    int64_t flmax,      // max flop count for any vector computed by this task
+    int64_t cvlen,      // vector length of C
+    const GrB_Desc_Value AxB_method     // Default, Gustavson, or Hash
+)
+{
+    // hash_size = 2 * (smallest power of 2 >= flmax)
+    double hlog = log2 ((double) flmax) ;
+    int64_t hash_size = ((int64_t) 2) << ((int64_t) floor (hlog) + 1) ;
+    bool use_Gustavson ;
+
+    if (AxB_method == GxB_AxB_GUSTAVSON)
+    { 
+        // always use Gustavson's method
+        use_Gustavson = true ;
+    }
+    else if (AxB_method == GxB_AxB_HASH)
+    { 
+        // always use Hash method, unless the hash_size >= cvlen
+        use_Gustavson = (hash_size >= cvlen) ;
+    }
+    else
+    { 
+        // default: auto selection:
+        // use Gustavson's method if hash_size is too big
+        use_Gustavson = (hash_size >= cvlen/16) ;
+    }
+
+    if (use_Gustavson)
+    { 
+        hash_size = cvlen ;
+    }
+    return (hash_size) ;
+}
+
+//------------------------------------------------------------------------------
+// GB_create_coarse_task: create a single coarse task
+//------------------------------------------------------------------------------
+
+// Compute the max flop count for any vector in a coarse task, determine the
+// hash table size, and construct the coarse task.
+
+static inline void GB_create_coarse_task
+(
+    int64_t kfirst,     // coarse task consists of vectors kfirst:klast
+    int64_t klast,
+    GB_saxpy3task_struct *TaskList,
+    int taskid,         // taskid for this coarse task
+    int64_t *Bflops,    // size bnvec; cum sum of flop counts for vectors of B
+    int64_t cvlen,      // vector length of B and C
+    double chunk,
+    int nthreads_max,
+    const GrB_Desc_Value AxB_method     // Default, Gustavson, or Hash
+)
+{
+    // find the max # of flops for any vector in this task
+    int64_t flmax = 1 ;
+    int nth = GB_nthreads (klast-kfirst+1, chunk, nthreads_max) ;
+    int64_t kk ;
+    #pragma omp parallel for num_threads(nth) schedule(static) \
+        reduction(max:flmax)
+    for (kk = kfirst ; kk <= klast ; kk++)
+    { 
+        int64_t fl = Bflops [kk+1] - Bflops [kk] ;
+        flmax = GB_IMAX (flmax, fl) ;
+    }
+    // define the coarse task
+    TaskList [taskid].start   = kfirst ;
+    TaskList [taskid].end     = klast ;
+    TaskList [taskid].vector  = -1 ;
+    TaskList [taskid].hsize   = GB_hash_table_size (flmax, cvlen, AxB_method) ;
+    TaskList [taskid].Hi      = NULL ;      // assigned later
+    TaskList [taskid].Hf      = NULL ;      // assigned later
+    TaskList [taskid].Hx      = NULL ;      // assigned later
+    TaskList [taskid].my_cjnz = 0 ;         // unused
+    TaskList [taskid].flops   = Bflops [klast+1] - Bflops [kfirst] ;
+    TaskList [taskid].master  = taskid ;
+    TaskList [taskid].team_size = 1 ;
+}
+
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3: compute C=A*B, C<M>=A*B, or C<!M>=A*B in parallel
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxB_saxpy3              // C = A*B using Gustavson+Hash
+(
+    GrB_Matrix *Chandle,            // output matrix
+    const GrB_Matrix M_input,       // optional mask matrix
+    const bool Mask_comp_input,     // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
+    const GrB_Matrix A,             // input matrix A
+    const GrB_Matrix B,             // input matrix B
+    const GrB_Semiring semiring,    // semiring that defines C=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    bool *mask_applied,             // if true, then mask was applied
+    const GrB_Desc_Value AxB_method,    // Default, Gustavson, or Hash
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+
+    GrB_Matrix M = M_input ;        // use the mask M, until deciding otherwise
+    bool Mask_comp = Mask_comp_input ;
+
+    (*mask_applied) = false ;
+    ASSERT (Chandle != NULL) ;
+    ASSERT (*Chandle == NULL) ;
+    ASSERT_MATRIX_OK_OR_NULL (M, "M for saxpy3 A*B", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for saxpy3 A*B", GB0) ;
+    ASSERT_MATRIX_OK (B, "B for saxpy3 A*B", GB0) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
+    ASSERT_SEMIRING_OK (semiring, "semiring for saxpy3 A*B", GB0) ;
+    ASSERT (A->vdim == B->vlen) ;
+
+    int64_t *GB_RESTRICT Hi_all = NULL ;
+    int64_t *GB_RESTRICT Hf_all = NULL ;
+    GB_void *GB_RESTRICT Hx_all = NULL ;
+    int64_t *GB_RESTRICT Coarse_initial = NULL ;    // initial coarse tasks
+    GB_saxpy3task_struct *GB_RESTRICT TaskList = NULL ;
+    int64_t *GB_RESTRICT Fine_slice = NULL ;
+    int64_t *GB_RESTRICT Bflops2 = NULL ;
+
+    int ntasks = 0 ;
+    int ntasks_initial = 0 ;
+    size_t Hi_size_total = 0 ;
+    size_t Hf_size_total = 0 ;
+    size_t Hx_size_total = 0 ;
+    int64_t max_bjnz = 0 ;
+
+    //--------------------------------------------------------------------------
+    // get the semiring operators
+    //--------------------------------------------------------------------------
+
+    GrB_BinaryOp mult = semiring->multiply ;
+    GrB_Monoid add = semiring->add ;
+    ASSERT (mult->ztype == add->op->ztype) ;
+
+    bool op_is_first  = mult->opcode == GB_FIRST_opcode ;
+    bool op_is_second = mult->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = mult->opcode == GB_PAIR_opcode ;
+    bool A_is_pattern = false ;
+    bool B_is_pattern = false ;
+
+    if (flipxy)
+    { 
+        // z = fmult (b,a) will be computed
+        A_is_pattern = op_is_first  || op_is_pair ;
+        B_is_pattern = op_is_second || op_is_pair ;
+        ASSERT (GB_IMPLIES (!A_is_pattern,
+            GB_Type_compatible (A->type, mult->ytype))) ;
+        ASSERT (GB_IMPLIES (!B_is_pattern,
+            GB_Type_compatible (B->type, mult->xtype))) ;
+    }
+    else
+    { 
+        // z = fmult (a,b) will be computed
+        A_is_pattern = op_is_second || op_is_pair ;
+        B_is_pattern = op_is_first  || op_is_pair ;
+        ASSERT (GB_IMPLIES (!A_is_pattern,
+            GB_Type_compatible (A->type, mult->xtype))) ;
+        ASSERT (GB_IMPLIES (!B_is_pattern,
+            GB_Type_compatible (B->type, mult->ytype))) ;
+    }
+
+    #ifdef GBCOMPACT
+    bool is_any_pair_semiring = false ;
+    #else
+    GB_Opcode mult_opcode, add_opcode ;
+    GB_Type_code xycode, zcode ;
+    bool builtin_semiring = GB_AxB_semiring_builtin (A, A_is_pattern, B,
+        B_is_pattern, semiring, flipxy, &mult_opcode, &add_opcode, &xycode,
+        &zcode) ;
+    bool is_any_pair_semiring = builtin_semiring
+        && (add_opcode == GB_ANY_opcode)
+        && (mult_opcode == GB_PAIR_opcode) ;
+    #endif
+
+    (*Chandle) = NULL ;
+
+    //--------------------------------------------------------------------------
+    // get A, and B
+    //--------------------------------------------------------------------------
+
+    const int64_t *GB_RESTRICT Ap = A->p ;
+    const int64_t *GB_RESTRICT Ah = A->h ;
+    const int64_t *GB_RESTRICT Ai = A->i ;
+    const int64_t avlen = A->vlen ;
+    const int64_t anvec = A->nvec ;
+    const bool A_is_hyper = A->is_hyper ;
+
+    const int64_t *GB_RESTRICT Bp = B->p ;
+    const int64_t *GB_RESTRICT Bh = B->h ;
+    const int64_t *GB_RESTRICT Bi = B->i ;
+    const int64_t bvdim = B->vdim ;
+    const int64_t bnz = GB_NNZ (B) ;
+    const int64_t bnvec = B->nvec ;
+    const bool B_is_hyper = B->is_hyper ;
+
+    //--------------------------------------------------------------------------
+    // determine the # of threads to use
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+
+    //--------------------------------------------------------------------------
+    // allocate C (just C->p and C->h, but not C->i or C->x)
+    //--------------------------------------------------------------------------
+
+    GrB_Type ctype = add->op->ztype ;
+    size_t csize = ctype->size ;
+    int64_t cvlen = avlen ;
+    int64_t cvdim = bvdim ;
+    int64_t cnvec = bnvec ;
+
+    // calloc Cp so it can be used as the Bflops workspace
+    GB_NEW (Chandle, ctype, cvlen, cvdim, GB_Ap_calloc, true,
+        GB_SAME_HYPER_AS (B_is_hyper), B->hyper_ratio, cnvec, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_FREE_ALL ;
+        return (info) ;
+    }
+
+    GrB_Matrix C = (*Chandle) ;
+
+    int64_t *GB_RESTRICT Cp = C->p ;
+    int64_t *GB_RESTRICT Ch = C->h ;
+    if (B_is_hyper)
+    { 
+        // C has the same set of vectors as B
+        int nth = GB_nthreads (cnvec, chunk, nthreads_max) ;
+        GB_memcpy (Ch, Bh, cnvec * sizeof (int64_t), nth) ;
+        C->nvec = bnvec ;
+    }
+
+    //==========================================================================
+    // phase0: create parallel tasks
+    //==========================================================================
+
+    //--------------------------------------------------------------------------
+    // compute flop counts for each vector of B and C
+    //--------------------------------------------------------------------------
+
+    int64_t Mwork = 0 ;
+    int64_t *GB_RESTRICT Bflops = Cp ;  // Cp is used as workspace for Bflops
+    GB_OK (GB_AxB_flopcount (&Mwork, Bflops, M, Mask_comp, A, B, Context)) ;
+    int64_t total_flops = Bflops [bnvec] ;
+
+    //--------------------------------------------------------------------------
+    // determine if the mask M should be applied, or done later
+    //--------------------------------------------------------------------------
+
+    // If M is very large as compared to A*B, then it is too costly to apply
+    // during the computation of A*B.  In this case, compute C=A*B, ignoring
+    // the mask.  Tell the caller that the mask was not applied, so that it
+    // will be applied later in GB_mxm.
+
+    double axbflops = total_flops - Mwork ;
+    GBBURBLE ("axbflops %g Mwork %g ", axbflops, (double) Mwork) ;
+
+    if ((M != NULL) && (axbflops < ((double) Mwork * GB_MWORK_ALPHA)))
+    {
+        // M is present but costly to use.  Do not use it during the
+        // computation of A*B.  Instead, compute C=A*B and then apply the mask
+        // later.
+
+        M = NULL ;
+        Mask_comp = false ;
+
+        int nth = GB_nthreads (bnvec, chunk, nthreads_max) ;
+        int64_t kk ;
+        // GB_AxB_flopcount requires Bflops be set to zero here
+        #pragma omp parallel for num_threads(nth) schedule(static)
+        for (kk = 0 ; kk <= bnvec ; kk++)
+        { 
+            Bflops [kk] = 0 ;
+        }
+
+        // redo the flop count analysis, without the mask
+        GB_OK (GB_AxB_flopcount (&Mwork, Bflops, NULL, false, A, B, Context)) ;
+        total_flops = Bflops [bnvec] ;
+        GBBURBLE ("(discard mask) ") ;
+    }
+    else if (M != NULL)
+    { 
+        GBBURBLE ("(use mask) ") ;
+    }
+
+    //--------------------------------------------------------------------------
+    // get M
+    //--------------------------------------------------------------------------
+
+    bool mask_is_M = (M != NULL && !Mask_comp) ;
+    const int64_t *GB_RESTRICT Mp = NULL ;
+    const int64_t *GB_RESTRICT Mh = NULL ;
+    const int64_t *GB_RESTRICT Mi = NULL ;
+    // const GB_void *GB_RESTRICT Mx = NULL ;
+    // size_t msize = 0 ;
+    int64_t mnvec = 0 ;
+    bool M_is_hyper = false ;
+    if (M != NULL)
+    { 
+        Mp = M->p ;
+        Mh = M->h ;
+        Mi = M->i ;
+        // Mx = M->x ;
+        // msize = M->type->size ;
+        mnvec = M->nvec ;
+        M_is_hyper = M->is_hyper ;
+    }
+
+    //--------------------------------------------------------------------------
+    // determine # of threads and # of initial coarse tasks
+    //--------------------------------------------------------------------------
+
+    int nthreads = GB_nthreads ((double) total_flops, chunk, nthreads_max) ;
+    ntasks_initial = (nthreads == 1) ?  1 : (GB_NTASKS_PER_THREAD * nthreads) ;
+
+    double target_task_size = ((double) total_flops) / ntasks_initial ;
+    target_task_size = GB_IMAX (target_task_size, chunk) ;
+    double target_fine_size = target_task_size / GB_FINE_WORK ;
+    target_fine_size = GB_IMAX (target_fine_size, chunk) ;
+
+    //--------------------------------------------------------------------------
+    // determine # of parallel tasks
+    //--------------------------------------------------------------------------
+
+    int nfine = 0 ;         // # of fine tasks
+    int ncoarse = 0 ;       // # of coarse tasks
+    max_bjnz = 0 ;          // max (nnz (B (:,j))) of fine tasks
+
+    // FUTURE: also use ultra-fine tasks that compute A(i1:i2,k)*B(k,j)
+
+    if (ntasks_initial > 1)
+    {
+
+        //----------------------------------------------------------------------
+        // construct initial coarse tasks
+        //----------------------------------------------------------------------
+
+        if (!GB_pslice (&Coarse_initial, Bflops, bnvec, ntasks_initial))
+        { 
+            // out of memory
+            GB_FREE_ALL ;
+            return (GB_OUT_OF_MEMORY) ;
+        }
+
+        //----------------------------------------------------------------------
+        // split the work into coarse and fine tasks
+        //----------------------------------------------------------------------
+
+        for (int taskid = 0 ; taskid < ntasks_initial ; taskid++)
+        {
+            // get the initial coarse task
+            int64_t kfirst = Coarse_initial [taskid] ;
+            int64_t klast  = Coarse_initial [taskid+1] ;
+            int64_t task_ncols = klast - kfirst ;
+            int64_t task_flops = Bflops [klast] - Bflops [kfirst] ;
+
+            if (task_ncols == 0)
+            { 
+                // This coarse task is empty, having been squeezed out by
+                // costly vectors in adjacent coarse tasks.
+            }
+            else if (task_flops > 2 * GB_COSTLY * target_task_size)
+            {
+                // This coarse task is too costly, because it contains one or
+                // more costly vectors.  Split its vectors into a mixture of
+                // coarse and fine tasks.
+
+                int64_t kcoarse_start = kfirst ;
+
+                for (int64_t kk = kfirst ; kk < klast ; kk++)
+                {
+                    // jflops = # of flops to compute a single vector A*B(:,j)
+                    // where j == (Bh == NULL) ? kk : Bh [kk].
+                    double jflops = Bflops [kk+1] - Bflops [kk] ;
+                    // bjnz = nnz (B (:,j))
+                    int64_t bjnz = Bp [kk+1] - Bp [kk] ;
+
+                    if (jflops > GB_COSTLY * target_task_size && bjnz > 1)
+                    {
+                        // A*B(:,j) is costly; split it into 2 or more fine
+                        // tasks.  First flush the prior coarse task, if any.
+                        if (kcoarse_start < kk)
+                        { 
+                            // vectors kcoarse_start to kk-1 form a single
+                            // coarse task
+                            ncoarse++ ;
+                        }
+
+                        // next coarse task (if any) starts at kk+1
+                        kcoarse_start = kk+1 ;
+
+                        // vectors kk will be split into multiple fine tasks
+                        max_bjnz = GB_IMAX (max_bjnz, bjnz) ;
+                        int team_size = ceil (jflops / target_fine_size) ;
+                        nfine += team_size ;
+                    }
+                }
+
+                // flush the last coarse task, if any
+                if (kcoarse_start < klast)
+                { 
+                    // vectors kcoarse_start to klast-1 form a single
+                    // coarse task
+                    ncoarse++ ;
+                }
+
+            }
+            else
+            { 
+                // This coarse task is OK as-is.
+                ncoarse++ ;
+            }
+        }
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // entire computation in a single fine or coarse task
+        //----------------------------------------------------------------------
+
+        if (bnvec == 1)
+        { 
+            // If B is a single vector, and is computed by a single thread,
+            // then a single fine task is used.
+            nfine = 1 ;
+            ncoarse = 0 ;
+        }
+        else
+        { 
+            // One thread uses a single coarse task if B is not a vector.
+            nfine = 0 ;
+            ncoarse = 1 ;
+        }
+    }
+
+    ntasks = ncoarse + nfine ;
+
+    //--------------------------------------------------------------------------
+    // allocate the tasks, and workspace to construct fine tasks
+    //--------------------------------------------------------------------------
+
+    GB_CALLOC_MEMORY (TaskList, ntasks, sizeof (GB_saxpy3task_struct)) ;
+    if (max_bjnz > 0)
+    { 
+        // also allocate workspace to construct fine tasks
+        GB_MALLOC_MEMORY (Fine_slice, ntasks+1, sizeof (int64_t)) ;
+        GB_MALLOC_MEMORY (Bflops2, max_bjnz+1, sizeof (int64_t)) ;
+    }
+
+    if (TaskList == NULL ||
+        (max_bjnz > 0 && (Fine_slice == NULL || Bflops2 == NULL)))
+    { 
+        // out of memory
+        GB_FREE_ALL ;
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // create the tasks
+    //--------------------------------------------------------------------------
+
+    if (ntasks_initial > 1)
+    {
+
+        //----------------------------------------------------------------------
+        // create the coarse and fine tasks
+        //----------------------------------------------------------------------
+
+        int nf = 0 ;        // fine tasks have task id 0:nfine-1
+        int nc = nfine ;    // coarse task ids are nfine:ntasks-1
+
+        for (int taskid = 0 ; taskid < ntasks_initial ; taskid++)
+        {
+            // get the initial coarse task
+            int64_t kfirst = Coarse_initial [taskid] ;
+            int64_t klast  = Coarse_initial [taskid+1] ;
+            int64_t task_ncols = klast - kfirst ;
+            int64_t task_flops = Bflops [klast] - Bflops [kfirst] ;
+
+            if (task_ncols == 0)
+            { 
+                // This coarse task is empty, having been squeezed out by
+                // costly vectors in adjacent coarse tasks.
+            }
+            else if (task_flops > 2 * GB_COSTLY * target_task_size)
+            {
+                // This coarse task is too costly, because it contains one or
+                // more costly vectors.  Split its vectors into a mixture of
+                // coarse and fine tasks.
+
+                int64_t kcoarse_start = kfirst ;
+
+                for (int64_t kk = kfirst ; kk < klast ; kk++)
+                {
+                    // jflops = # of flops to compute a single vector A*B(:,j)
+                    double jflops = Bflops [kk+1] - Bflops [kk] ;
+                    // bjnz = nnz (B (:,j))
+                    int64_t bjnz = Bp [kk+1] - Bp [kk] ;
+
+                    if (jflops > GB_COSTLY * target_task_size && bjnz > 1)
+                    {
+                        // A*B(:,j) is costly; split it into 2 or more fine
+                        // tasks.  First flush the prior coarse task, if any.
+                        if (kcoarse_start < kk)
+                        { 
+                            // kcoarse_start:kk-1 form a single coarse task
+                            GB_create_coarse_task (kcoarse_start, kk-1,
+                                TaskList, nc++, Bflops, cvlen,
+                                chunk, nthreads_max, AxB_method) ;
+                        }
+
+                        // next coarse task (if any) starts at kk+1
+                        kcoarse_start = kk+1 ;
+
+                        // get the mask M(:,j), for C<M>=A*B
+                        int64_t im_first = -1, im_last = -1 ;
+                        if (mask_is_M)
+                        {
+                            int64_t j = (Bh == NULL) ? kk : Bh [kk] ;
+                            int64_t mpleft = 0 ;
+                            int64_t mpright = mnvec-1 ;
+                            int64_t pM, pM_end ;
+                            GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright, j,
+                                &pM, &pM_end) ;
+                            int64_t mjnz = pM_end - pM ;    // nnz (M (:,j))
+                            // For C<M>=A*B, if M(:,j) is empty, then there
+                            // would be no flops to compute C(:,j), and thus
+                            // no fine tasks constructed for C(:,j).
+                            // Thus mjnz > 0 must hold.
+                            ASSERT (mjnz > 0) ;
+                            if (mjnz > 0)   // but check anyway, just to be safe
+                            { 
+                                im_first = Mi [pM] ;
+                                im_last  = Mi [pM_end-1] ;
+                            }
+                        }
+
+                        // count the work for each entry B(k,j).  Do not
+                        // include the work to scan M(:,j), since that will
+                        // be evenly divided between all tasks in this team.
+                        // Do check if M(:,j) and A(:,k) are disjoint, for
+                        // C<M>=A*B, when accounting for the flops for B(k,j).
+                        int64_t pB_start = Bp [kk] ;
+                        int nth = GB_nthreads (bjnz, chunk, nthreads_max) ;
+                        int64_t s ;
+                        #pragma omp parallel for num_threads(nth) \
+                            schedule(static)
+                        for (s = 0 ; s < bjnz ; s++)
+                        {
+                            // get B(k,j)
+                            int64_t k = Bi [pB_start + s] ;
+                            // fl = flop count for just A(:,k)*B(k,j)
+                            int64_t pA, pA_end ;
+                            int64_t pleft = 0 ;
+                            GB_lookup (A_is_hyper, Ah, Ap, &pleft, anvec-1, k,
+                                &pA, &pA_end) ;
+                            int64_t fl = pA_end - pA ;
+                            if (mask_is_M && fl > 0)
+                            { 
+                                // no work if A(:,k) and M(:,j) disjoint
+                                int64_t alo = Ai [pA] ;      // get first A(:,k)
+                                int64_t ahi = Ai [pA_end-1] ;// get last A(:,k)
+                                if (ahi < im_first || alo > im_last) fl = 0 ;
+                            }
+                            Bflops2 [s] = fl ;
+                            ASSERT (fl >= 0) ;
+                        }
+
+                        // cumulative sum of flops to compute A*B(:,j)
+                        GB_cumsum (Bflops2, bjnz, NULL, nth) ;
+
+                        // slice B(:,j) into fine tasks
+                        int team_size = ceil (jflops / target_fine_size) ;
+                        ASSERT (Fine_slice != NULL) ;
+                        GB_pslice (&Fine_slice, Bflops2, bjnz, team_size) ;
+
+                        // shared hash table for all fine tasks for A*B(:,j)
+                        int64_t hsize = 
+                            GB_hash_table_size (jflops, cvlen, AxB_method) ;
+
+                        // construct the fine tasks for C(:,j)=A*B(:,j)
+                        int master = nf ;
+                        for (int fid = 0 ; fid < team_size ; fid++)
+                        { 
+                            int64_t pstart = Fine_slice [fid] ;
+                            int64_t pend   = Fine_slice [fid+1] ;
+                            int64_t fl = Bflops2 [pend] - Bflops2 [pstart] ;
+                            TaskList [nf].start  = pB_start + pstart ;
+                            TaskList [nf].end    = pB_start + pend - 1 ;
+                            TaskList [nf].vector = kk ;
+                            TaskList [nf].hsize  = hsize ;
+                            TaskList [nf].Hi = NULL ;   // assigned later
+                            TaskList [nf].Hf = NULL ;   // assigned later
+                            TaskList [nf].Hx = NULL ;   // assigned later
+                            TaskList [nf].my_cjnz = 0 ;
+                            TaskList [nf].flops = fl ;
+                            TaskList [nf].master = master ;
+                            TaskList [nf].team_size = team_size ;
+                            nf++ ;
+                        }
+                    }
+                }
+
+                // flush the last coarse task, if any
+                if (kcoarse_start < klast)
+                { 
+                    // kcoarse_start:klast-1 form a single coarse task
+                    GB_create_coarse_task (kcoarse_start, klast-1, TaskList,
+                        nc++, Bflops, cvlen, chunk, nthreads_max, AxB_method) ;
+                }
+
+            }
+            else
+            { 
+                // This coarse task is OK as-is.
+                GB_create_coarse_task (kfirst, klast-1, TaskList, nc++, Bflops,
+                    cvlen, chunk, nthreads_max, AxB_method) ;
+            }
+        }
+
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // entire computation in a single fine or coarse task
+        //----------------------------------------------------------------------
+
+        // create a single coarse task
+        GB_create_coarse_task (0, bnvec-1, TaskList, 0, Bflops, cvlen, 1, 1,
+            AxB_method) ;
+
+        if (bnvec == 1)
+        { 
+            // convert the single coarse task into a single fine task
+            TaskList [0].start  = 0 ;           // first entry in B(:,0)
+            TaskList [0].end    = bnz - 1 ;     // last entry in B(:,0)
+            TaskList [0].vector = 0 ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace used to create the tasks
+    //--------------------------------------------------------------------------
+
+    // Frees Bflops2, Coarse_initial, and Fine_slice.  These do not need to
+    // be freed in the GB_Asaxpy3B worker below.
+
+    GB_FREE_INITIAL_WORK ;
+
+    //--------------------------------------------------------------------------
+
+    #if GB_BURBLE
+    int nfine_hash = 0 ;
+    int nfine_gus = 0 ;
+    int ncoarse_hash = 0 ;
+    int ncoarse_1hash = 0 ;
+    int ncoarse_gus = 0 ;
+    for (int taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+        int64_t hash_size = TaskList [taskid].hsize ;
+        bool is_fine = (taskid < nfine) ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+        if (is_fine)
+        {
+            // fine task
+            if (use_Gustavson)
+            {
+                // fine Gustavson task
+                nfine_gus++ ;
+            }
+            else
+            {
+                // fine hash task
+                nfine_hash++ ;
+            }
+        }
+        else
+        {
+            // coarse task
+            int64_t kfirst = TaskList [taskid].start ;
+            int64_t klast = TaskList [taskid].end ;
+            if (use_Gustavson)
+            {
+                // coarse Gustavson task
+                ncoarse_gus++ ;
+            }
+            else
+            {
+                // hash task
+                ncoarse_hash++ ;
+            }
+        }
+    }
+
+    GBBURBLE ("nthreads %d ntasks %d coarse: (gus: %d hash: %d)"
+        " fine: (gus: %d hash: %d) ", nthreads, ntasks,
+        ncoarse_gus, ncoarse_hash, nfine_gus, nfine_hash) ;
+    #endif
+
+    // Bflops is no longer needed as an alias for Cp
+    Bflops = NULL ;
+
+    //--------------------------------------------------------------------------
+    // allocate the hash tables
+    //--------------------------------------------------------------------------
+
+    // If Gustavson's method is used (coarse tasks):
+    //
+    //      hash_size is cvlen.
+    //      Hi is not allocated.
+    //      Hf and Hx are both of size hash_size.
+    //
+    //      (Hf [i] == mark) is true if i is in the hash table.
+    //      Hx [i] is the value of C(i,j) during the numeric phase.
+    //
+    //      Gustavson's method is used if the hash_size for the Hash method
+    //      is a significant fraction of cvlen. 
+    //
+    // If the Hash method is used (coarse tasks):
+    //
+    //      hash_size is 2 times the smallest power of 2 that is larger than
+    //      the # of flops required for any column C(:,j) being computed.  This
+    //      ensures that all entries have space in the hash table, and that the
+    //      hash occupancy will never be more than 50%.  It is always smaller
+    //      than cvlen (otherwise, Gustavson's method is used).
+    //
+    //      A hash function is used for the ith entry:
+    //          hash = (i * GB_HASH_FACTOR) & (hash_size-1)
+    //      If a collision occurs, linear probing is used:
+    //          hash = (hash + 1) & (hashsize-1)
+    //
+    //      (Hf [hash] == mark) is true if the position is occupied.
+    //      i = Hi [hash] gives the row index i that occupies that position.
+    //      Hx [hash] is the value of C(i,j) during the numeric phase.
+    //
+    // For both coarse methods:
+    //
+    //      Hf starts out all zero (via calloc), and mark starts out as 1.  To
+    //      clear all of Hf, mark is incremented, so that all entries in Hf are
+    //      not equal to mark.
+
+    // add some padding to the end of each hash table, to avoid false
+    // sharing of cache lines between the hash tables.
+    size_t hx_pad = 64 ;
+    size_t hi_pad = 64 / sizeof (int64_t) ;
+
+    Hi_size_total = 0 ;
+    Hf_size_total = 0 ;
+    Hx_size_total = 0 ;
+
+    // determine the total size of all hash tables
+    for (int taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+        if (taskid != TaskList [taskid].master)
+        { 
+            // allocate a single shared hash table for all fine
+            // tasks that compute a single C(:,j)
+            continue ;
+        }
+
+        int64_t hash_size = TaskList [taskid].hsize ;
+        int64_t k = TaskList [taskid].vector ;
+        bool is_fine = (k >= 0) ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+        // int64_t kfirst = TaskList [taskid].start ;
+        // int64_t klast = TaskList [taskid].end ;
+
+        if (is_fine && use_Gustavson)
+        { 
+            // Hf is uint8_t for the fine Gustavson tasks, but round up
+            // to the nearest number of int64_t values.
+            Hf_size_total += GB_CEIL ((hash_size + hi_pad), sizeof (int64_t)) ;
+        }
+        else
+        { 
+            // all other methods use Hf as int64_t
+            Hf_size_total += (hash_size + hi_pad) ;
+        }
+        if (!is_fine && !use_Gustavson)
+        { 
+            // only coarse hash tasks need Hi
+            Hi_size_total += (hash_size + hi_pad) ;
+        }
+        // all tasks use an Hx array of size hash_size
+        if (!is_any_pair_semiring)
+        { 
+            // except that the ANY_PAIR semiring does not use Hx
+            Hx_size_total += (hash_size * csize + hx_pad) ;
+        }
+    }
+
+    // allocate space for all hash tables
+    if (Hi_size_total > 0)
+    { 
+        GB_MALLOC_MEMORY (Hi_all, Hi_size_total, sizeof (int64_t)) ;
+    }
+    if (Hf_size_total > 0)
+    { 
+        GB_CALLOC_MEMORY (Hf_all, Hf_size_total, sizeof (int64_t)) ;
+    }
+    if (Hx_size_total > 0)
+    { 
+        GB_MALLOC_MEMORY (Hx_all, Hx_size_total, 1) ;
+    }
+
+    if ((Hi_size_total > 0 && Hi_all == NULL) ||
+        (Hf_size_total > 0 && Hf_all == NULL) || 
+        (Hx_size_total > 0 && Hx_all == NULL))
+    { 
+        // out of memory
+        GB_FREE_ALL ;
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    // split the space into separate hash tables
+    int64_t *GB_RESTRICT Hi_split = Hi_all ;
+    int64_t *GB_RESTRICT Hf_split = Hf_all ;
+    GB_void *GB_RESTRICT Hx_split = Hx_all ;
+
+    for (int taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+        if (taskid != TaskList [taskid].master)
+        { 
+            // allocate a single hash table for all fine
+            // tasks that compute a single C(:,j)
+            continue ;
+        }
+
+        TaskList [taskid].Hi = Hi_split ;
+        TaskList [taskid].Hf = (void *) Hf_split ;
+        TaskList [taskid].Hx = Hx_split ;
+
+        int64_t hash_size = TaskList [taskid].hsize ;
+        int64_t k = TaskList [taskid].vector ;
+        bool is_fine = (k >= 0) ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+        // int64_t kfirst = TaskList [taskid].start ;
+        // int64_t klast = TaskList [taskid].end ;
+
+        if (is_fine && use_Gustavson)
+        { 
+            // Hf is uint8_t for the fine Gustavson method
+            Hf_split += GB_CEIL ((hash_size + hi_pad), sizeof (int64_t)) ;
+        }
+        else
+        { 
+            // Hf is int64_t for all other methods
+            Hf_split += (hash_size + hi_pad) ;
+        }
+        if (!is_fine && !use_Gustavson)
+        { 
+            // only coarse hash tasks need Hi
+            Hi_split += (hash_size + hi_pad) ;
+        }
+        // all tasks use an Hx array of size hash_size
+        if (!is_any_pair_semiring)
+        { 
+            Hx_split += (hash_size * csize + hx_pad) ;
+        }
+    }
+
+    // assign shared hash tables to fine task teams
+    for (int taskid = 0 ; taskid < nfine ; taskid++)
+    {
+        int master = TaskList [taskid].master ;
+        ASSERT (TaskList [master].vector >= 0) ;
+        if (taskid != master)
+        { 
+            // this fine task (Gustavson or hash) shares its hash table
+            // with all other tasks in its team, for a single vector C(:,j).
+            ASSERT (TaskList [taskid].vector == TaskList [master].vector) ;
+            TaskList [taskid].Hf = TaskList [master].Hf ;
+            TaskList [taskid].Hx = TaskList [master].Hx ;
+        }
+    }
+
+    //==========================================================================
+    // phase1: symbolic analysis
+    //==========================================================================
+
+    GB_AxB_saxpy3_symbolic (C, M, Mask_comp, Mask_struct, A, B, TaskList,
+        ntasks, nfine, nthreads) ;
+
+    //==========================================================================
+    // C = A*B, via saxpy3 method and built-in semiring
+    //==========================================================================
+
+    bool done = false ;
+
+#ifndef GBCOMPACT
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    #define GB_Asaxpy3B(add,mult,xyname) GB_Asaxpy3B_ ## add ## mult ## xyname
+
+    #define GB_AxB_WORKER(add,mult,xyname)                              \
+    {                                                                   \
+        info = GB_Asaxpy3B (add,mult,xyname) (C, M, Mask_comp,          \
+            Mask_struct, A, A_is_pattern, B, B_is_pattern,              \
+            TaskList, ntasks, nfine, nthreads, Context) ;               \
+        done = (info != GrB_NO_VALUE) ;                                 \
+    }                                                                   \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    if (builtin_semiring)
+    { 
+        #include "GB_AxB_factory.c"
+    }
+
+#endif
+
+    //==========================================================================
+    // C = A*B, via the generic saxpy3 method, with typecasting
+    //==========================================================================
+
+    if (!done)
+    { 
+        GB_BURBLE_MATRIX (C, "generic ") ;
+        info = GB_AxB_saxpy3_generic (C, M, Mask_comp, Mask_struct,
+            A, A_is_pattern, B, B_is_pattern, semiring, flipxy,
+            TaskList, ntasks, nfine, nthreads, Context) ;
+    }
+
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_FREE_ALL ;
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //==========================================================================
+    // prune empty vectors, free workspace, and return result
+    //==========================================================================
+
+    GB_FREE_WORK ;
+    info = GB_hypermatrix_prune (C, Context) ;
+    if (info == GrB_SUCCESS) { ASSERT_MATRIX_OK (C, "saxpy3: output", GB0) ; }
+    ASSERT (*Chandle == C) ;
+    ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (C)) ;
+    (*mask_applied) = (M != NULL) ;
+    return (info) ;
+}
+
diff --git a/Source/GB_AxB_saxpy3.h b/Source/GB_AxB_saxpy3.h
new file mode 100644
index 0000000000..dab079ae6b
--- /dev/null
+++ b/Source/GB_AxB_saxpy3.h
@@ -0,0 +1,136 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3.h: definitions for C=A*B saxpy3 method
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// GB_AxB_saxpy3 method uses a mix of Gustavson's method and the Hash method,
+// combining the two for any given C=A*B computation.
+
+#ifndef GB_AXB_SAXPY3_H
+#define GB_AXB_SAXPY3_H
+#include "GB.h"
+
+//------------------------------------------------------------------------------
+// functions for the Hash method for C=A*B
+//------------------------------------------------------------------------------
+
+#define GB_HASH_FACTOR 107
+
+// initial hash function, for where to place the integer i in the hash table.
+// hash_bits is a bit mask to compute the result modulo the hash table size,
+// which is always a power of 2.
+#define GB_HASH_FUNCTION(i) ((i * GB_HASH_FACTOR) & (hash_bits))
+
+// rehash function, for subsequent hash lookups if the initial hash function
+// refers to a hash entry that is already occupied.  Linear probing is used,
+// so the function does not currently depend on i.  On input, hash is equal
+// to the current value of the hash function, and on output, hash is set to
+// the new hash value.
+#define GB_REHASH(hash,i) hash = ((hash + 1) & (hash_bits))
+
+// The hash functions and their parameters are from this paper:
+
+// [2] Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Aydın Buluç. 2018.
+// High-Performance Sparse Matrix-Matrix Products on Intel KNL and Multicore
+// Architectures. In Proc. 47th Intl. Conf. on Parallel Processing (ICPP '18).
+// Association for Computing Machinery, New York, NY, USA, Article 34, 1–10.
+// DOI:https://doi.org/10.1145/3229710.3229720
+
+//------------------------------------------------------------------------------
+// GB_saxpy3task_struct: task descriptor for GB_AxB_saxpy3
+//------------------------------------------------------------------------------
+
+// A coarse task computes C(:,j1:j2) = A*B(:,j1:j2), for a contiguous set of
+// vectors j1:j2.  A coarse taskid is denoted byTaskList [taskid].vector == -1,
+// kfirst = TaskList [taskid].start, and klast = TaskList [taskid].end, and
+// where j1 = (Bh == NULL) ? kstart : Bh [kstart] and likewise for j2.  No
+// summation is needed for the final result of each coarse task.
+
+// A fine taskid computes A*B(k1:k2,j) for a single vector C(:,j), for a
+// contiguous range k1:k2, where kk = Tasklist[taskid].vector (which is >= 0),
+// k1 = Bi [TaskList [taskid].start], k2 = Bi [TaskList [taskid].end].  It sums
+// its computations in a hash table shared by all fine tasks that compute
+// C(:,j), via atomics.  The vector index j is either kk if B is standard, or j
+// = B->h [kk] if B is hypersparse.
+
+// Both tasks use a hash table allocated uniquely for the task, in Hi, Hf, and
+// Hx.  The size of the hash table is determined by the maximum # of flops
+// needed to compute any vector in C(:,j1:j2) for a coarse task, or the entire
+// computation of the single vector in a fine task.  For the Hash method, the
+// table has a size that is twice the smallest a power of 2 larger than the
+// flop count.  If this size is a significant fraction of C->vlen, then the
+// Hash method is not used, and Gustavson's method is used, with the hash size
+// is set to C->vlen.
+
+typedef struct
+{
+    int64_t start ;     // starting vector for coarse task, p for fine task
+    int64_t end ;       // ending vector for coarse task, p for fine task
+    int64_t vector ;    // -1 for coarse task, vector j for fine task
+    int64_t hsize ;     // size of hash table
+    int64_t *Hi ;       // Hi array for hash table (coarse hash tasks only)
+    GB_void *Hf ;       // Hf array for hash table (uint8_t or int64_t)
+    GB_void *Hx ;       // Hx array for hash table
+    int64_t my_cjnz ;   // # entries in C(:,j) found by this fine task
+    int64_t flops ;     // # of flops in this task
+    int master ;        // master fine task for the vector C(:,j)
+    int team_size ;     // # of fine tasks in the team for vector C(:,j)
+}
+GB_saxpy3task_struct ;
+
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_symbolic: symbolic analysis for GB_AxB_saxpy3
+//------------------------------------------------------------------------------
+
+void GB_AxB_saxpy3_symbolic
+(
+    GrB_Matrix C,               // Cp [k] is computed for coarse tasks
+    const GrB_Matrix M,         // mask matrix M
+    bool Mask_comp,             // M complemented, or not
+    bool Mask_struct,           // M structural, or not
+    const GrB_Matrix A,         // A matrix; only the pattern is accessed
+    const GrB_Matrix B,         // B matrix; only the pattern is accessed
+    GB_saxpy3task_struct *TaskList,     // list of tasks, and workspace
+    int ntasks,                 // total number of tasks
+    int nfine,                  // number of fine tasks
+    int nthreads                // number of threads
+) ;
+
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_cumsum: cumulative sum of C->p for GB_AxB_saxpy3
+//------------------------------------------------------------------------------
+
+int64_t GB_AxB_saxpy3_cumsum    // return cjnz_max for fine tasks
+(
+    GrB_Matrix C,               // finalize C->p
+    GB_saxpy3task_struct *TaskList, // list of tasks, and workspace
+    int nfine,                  // number of fine tasks
+    double chunk,               // chunk size
+    int nthreads                // number of threads
+) ;
+
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_generic: for any types and operators
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxB_saxpy3_generic
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GrB_Semiring semiring,    // semiring that defines C=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+#endif
+
diff --git a/Source/GB_AxB_saxpy3_cumsum.c b/Source/GB_AxB_saxpy3_cumsum.c
new file mode 100644
index 0000000000..3faf1919e7
--- /dev/null
+++ b/Source/GB_AxB_saxpy3_cumsum.c
@@ -0,0 +1,156 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_cumsum: finalize nnz(C(:,j)) and find cumulative sum of Cp
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// phase3: fine tasks finalize their computation nnz(C(:,j))
+// phase4: cumulative sum of C->p
+
+#include "GB_AxB_saxpy3.h"
+
+int64_t GB_AxB_saxpy3_cumsum    // return cjnz_max for fine tasks
+(
+    GrB_Matrix C,               // finalize C->p
+    GB_saxpy3task_struct *TaskList, // list of tasks, and workspace
+    int nfine,                  // number of fine tasks
+    double chunk,               // chunk size
+    int nthreads                // number of threads
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get C
+    //--------------------------------------------------------------------------
+
+    int64_t *GB_RESTRICT Cp = C->p ;
+    const int64_t cvlen = C->vlen ;
+    const int64_t cnvec = C->nvec ;
+
+    //==========================================================================
+    // phase3: count nnz(C(:,j)) for fine tasks
+    //==========================================================================
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < nfine ; taskid++)
+    {
+
+        //----------------------------------------------------------------------
+        // get the task descriptor
+        //----------------------------------------------------------------------
+
+        // int64_t kk = TaskList [taskid].vector ;
+        int64_t hash_size = TaskList [taskid].hsize ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+        int team_size = TaskList [taskid].team_size ;
+        int master    = TaskList [taskid].master ;
+        int my_teamid = taskid - master ;
+        int64_t my_cjnz = 0 ;
+
+        if (use_Gustavson)
+        {
+
+            //------------------------------------------------------------------
+            // phase3: fine Gustavson task, C=A*B, C<M>=A*B, or C<!M>=A*B
+            //------------------------------------------------------------------
+
+            // Hf [i] == 2 if C(i,j) is an entry in C(:,j)
+
+            uint8_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+            int64_t istart, iend ;
+            GB_PARTITION (istart, iend, cvlen, my_teamid, team_size) ;
+            for (int64_t i = istart ; i < iend ; i++)
+            {
+                if (Hf [i] == 2)
+                { 
+                    my_cjnz++ ;
+                }
+            }
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // phase3: fine hash task, C=A*B, C<M>=A*B, or C<!M>=A*B
+            //------------------------------------------------------------------
+
+            // (Hf [hash] & 3) == 2 if C(i,j) is an entry in C(:,j),
+            // and the index i of the entry is (Hf [hash] >> 2) - 1.
+
+            int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+            int64_t mystart, myend ;
+            GB_PARTITION (mystart, myend, hash_size, my_teamid, team_size) ;
+            for (int64_t hash = mystart ; hash < myend ; hash++)
+            {
+                if ((Hf [hash] & 3) == 2)
+                { 
+                    my_cjnz++ ;
+                }
+            }
+        }
+
+        TaskList [taskid].my_cjnz = my_cjnz ;   // count my nnz(C(:,j))
+    }
+
+    //==========================================================================
+    // phase4: compute Cp with cumulative sum
+    //==========================================================================
+
+    // TaskList [taskid].my_cjnz is the # of unique entries found in C(:,j) by
+    // that task.  Sum these terms to compute total # of entries in C(:,j).
+
+    for (taskid = 0 ; taskid < nfine ; taskid++)
+    { 
+        int64_t kk = TaskList [taskid].vector ;
+        Cp [kk] = 0 ;
+    }
+
+    for (taskid = 0 ; taskid < nfine ; taskid++)
+    { 
+        int64_t kk = TaskList [taskid].vector ;
+        int64_t my_cjnz = TaskList [taskid].my_cjnz ;
+        Cp [kk] += my_cjnz ;
+        ASSERT (my_cjnz <= cvlen) ;
+    }
+
+    // Cp [kk] is now nnz (C (:,j)), for all vectors j, whether computed by
+    // fine tasks or coarse tasks, and where j == (Bh == NULL) ? kk : Bh [kk].
+
+    int nth = GB_nthreads (cnvec, chunk, nthreads) ;
+    GB_cumsum (Cp, cnvec, &(C->nvec_nonempty), nth) ;
+
+    // cumulative sum of nnz (C (:,j)) for each team of fine tasks
+    int64_t cjnz_sum = 0 ;
+    int64_t cjnz_max = 0 ;
+    for (taskid = 0 ; taskid < nfine ; taskid++)
+    {
+        if (taskid == TaskList [taskid].master)
+        {
+            cjnz_sum = 0 ;
+            // also find the max (C (:,j)) for any fine hash tasks
+            int64_t hash_size = TaskList [taskid].hsize ;
+            bool use_Gustavson = (hash_size == cvlen) ;
+            if (!use_Gustavson)
+            { 
+                int64_t kk = TaskList [taskid].vector ;
+                int64_t cjnz = Cp [kk+1] - Cp [kk] ;
+                cjnz_max = GB_IMAX (cjnz_max, cjnz) ;
+            }
+        }
+        int64_t my_cjnz = TaskList [taskid].my_cjnz ;
+        TaskList [taskid].my_cjnz = cjnz_sum ;
+        cjnz_sum += my_cjnz ;
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    return (cjnz_max) ;
+}
+
diff --git a/Source/GB_AxB_saxpy3_generic.c b/Source/GB_AxB_saxpy3_generic.c
new file mode 100644
index 0000000000..26af2a22c9
--- /dev/null
+++ b/Source/GB_AxB_saxpy3_generic.c
@@ -0,0 +1,178 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_generic: compute C=A*B, C<M>=A*B, or C<!M>=A*B in parallel
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// GB_AxB_saxpy3_generic computes C=A*B, C<M>=A*B, or C<!M>=A*B in parallel,
+// with arbitrary types and operators.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mxm.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_bracket.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+
+GrB_Info GB_AxB_saxpy3_generic
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GrB_Semiring semiring,    // semiring that defines C=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{
+
+    //----------------------------------------------------------------------
+    // get operators, functions, workspace, contents of A, B, and C
+    //----------------------------------------------------------------------
+
+    GrB_BinaryOp mult = semiring->multiply ;
+    GrB_Monoid add = semiring->add ;
+    ASSERT (mult->ztype == add->op->ztype) ;
+
+    GxB_binary_function fmult = mult->function ;
+    GxB_binary_function fadd  = add->op->function ;
+
+    size_t csize = C->type->size ;
+    size_t asize = A_is_pattern ? 0 : A->type->size ;
+    size_t bsize = B_is_pattern ? 0 : B->type->size ;
+
+    size_t xsize = mult->xtype->size ;
+    size_t ysize = mult->ytype->size ;
+
+    // scalar workspace: because of typecasting, the x/y types need not
+    // be the same as the size of the A and B types.
+    // flipxy false: aik = (xtype) A(i,k) and bkj = (ytype) B(k,j)
+    // flipxy true:  aik = (ytype) A(i,k) and bkj = (xtype) B(k,j)
+    size_t aik_size = flipxy ? ysize : xsize ;
+    size_t bkj_size = flipxy ? xsize : ysize ;
+
+    GB_void *GB_RESTRICT terminal = add->terminal ;
+    GB_void *GB_RESTRICT identity = add->identity ;
+
+    GB_cast_function cast_A, cast_B ;
+    if (flipxy)
+    { 
+        // A is typecasted to y, and B is typecasted to x
+        cast_A = A_is_pattern ? NULL : 
+                 GB_cast_factory (mult->ytype->code, A->type->code) ;
+        cast_B = B_is_pattern ? NULL : 
+                 GB_cast_factory (mult->xtype->code, B->type->code) ;
+    }
+    else
+    { 
+        // A is typecasted to x, and B is typecasted to y
+        cast_A = A_is_pattern ? NULL :
+                 GB_cast_factory (mult->xtype->code, A->type->code) ;
+        cast_B = B_is_pattern ? NULL :
+                 GB_cast_factory (mult->ytype->code, B->type->code) ;
+    }
+
+    //----------------------------------------------------------------------
+    // C = A*B via saxpy3 method, function pointers, and typecasting
+    //----------------------------------------------------------------------
+
+    #define GB_IDENTITY identity
+
+    // aik = A(i,k), located in Ax [pA]
+    #define GB_GETA(aik,Ax,pA)                                          \
+        GB_void aik [GB_VLA(aik_size)] ;                                \
+        if (!A_is_pattern) cast_A (aik, Ax +((pA)*asize), asize)
+
+    // bkj = B(k,j), located in Bx [pB]
+    #define GB_GETB(bkj,Bx,pB)                                          \
+        GB_void bkj [GB_VLA(bkj_size)] ;                                \
+        if (!B_is_pattern) cast_B (bkj, Bx +((pB)*bsize), bsize)
+
+    // t = A(i,k) * B(k,j)
+    #define GB_MULT(t, aik, bkj)                                        \
+        GB_MULTIPLY (t, aik, bkj)
+
+    // define t for each task
+    #define GB_CIJ_DECLARE(t)                                           \
+        GB_void t [GB_VLA(csize)]
+
+    // address of Cx [p]
+    #define GB_CX(p) (Cx +((p)*csize))
+
+    // Cx [p] = t
+    #define GB_CIJ_WRITE(p,t)                                           \
+        memcpy (GB_CX (p), t, csize)
+
+    // Cx [p] += t
+    #define GB_CIJ_UPDATE(p,t)                                          \
+        fadd (GB_CX (p), GB_CX (p), t)
+
+    // address of Hx [i]
+    #define GB_HX(i) (Hx +((i)*csize))
+
+    // atomic update not available for function pointers
+    #define GB_HAS_ATOMIC 0
+
+    // normal Hx [i] += t
+    #define GB_HX_UPDATE(i, t)                                          \
+        fadd (GB_HX (i), GB_HX (i), t)
+
+    // normal Hx [i] = t
+    #define GB_HX_WRITE(i, t)                                           \
+        memcpy (GB_HX (i), t, csize)
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i)                                          \
+        memcpy (GB_CX (p), GB_HX(i), csize)
+
+    // memcpy (&(Cx [pC]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(pC,i,len) \
+        memcpy (Cx +((pC)*csize), Hx +((i)*csize), (len) * csize)
+
+    // 1 if monoid update can skipped entirely (the ANY monoid)
+    #define GB_IS_ANY_MONOID 0
+
+    // user-defined monoid update cannot be done with an OpenMP atomic
+    #define GB_HAS_OMP_ATOMIC 0
+
+    // not an ANY_PAIR semiring
+    #define GB_IS_ANY_PAIR_SEMIRING 0
+
+    // not a PAIR multiply operator 
+    #define GB_IS_PAIR_MULTIPLIER 0
+
+    #define GB_ATYPE GB_void
+    #define GB_BTYPE GB_void
+    #define GB_CTYPE GB_void
+
+    // no vectorization
+    #define GB_PRAGMA_VECTORIZE
+    #define GB_PRAGMA_VECTORIZE_DOT
+
+    // definitions for GB_AxB_saxpy3_template.c
+    #include "GB_AxB_saxpy3_template.h"
+
+    if (flipxy)
+    { 
+        #define GB_MULTIPLY(z,x,y) fmult (z,y,x)
+        #include "GB_AxB_saxpy3_template.c"
+        #undef GB_MULTIPLY
+    }
+    else
+    { 
+        #define GB_MULTIPLY(z,x,y) fmult (z,x,y)
+        #include "GB_AxB_saxpy3_template.c"
+        #undef GB_MULTIPLY
+    }
+
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_AxB_saxpy3_symbolic.c b/Source/GB_AxB_saxpy3_symbolic.c
new file mode 100644
index 0000000000..b8bfa41453
--- /dev/null
+++ b/Source/GB_AxB_saxpy3_symbolic.c
@@ -0,0 +1,634 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_symbolic: symbolic analysis for GB_AxB_saxpy3
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Symbolic analysis for C=A*B, C<M>=A*B or C<!M>=A*B, via GB_AxB_saxpy3.
+// Coarse tasks compute nnz (C (:,j)) for each of their vectors j.  Fine tasks
+// just scatter the mask M into the hash table.  This phase does not depend on
+// the semiring, nor does it depend on the type of C, A, or B.  It does access
+// the values of M, if the mask matrix M is present and not structural.
+
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB_saxpy3_template.h"
+#include "GB_atomics.h"
+#include "GB_bracket.h"
+// GB_GET_A_k and GB_GET_M_j declare aknz and mjnz, but these are unused here.
+#include "GB_unused.h"
+
+void GB_AxB_saxpy3_symbolic
+(
+    GrB_Matrix C,               // Cp [k] is computed for coarse tasks
+    const GrB_Matrix M,         // mask matrix M
+    bool Mask_comp,             // M complemented, or not
+    bool Mask_struct,           // M structural, or not
+    const GrB_Matrix A,         // A matrix; only the pattern is accessed
+    const GrB_Matrix B,         // B matrix; only the pattern is accessed
+    GB_saxpy3task_struct *TaskList,     // list of tasks, and workspace
+    int ntasks,                 // total number of tasks
+    int nfine,                  // number of fine tasks
+    int nthreads                // number of threads
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get M, A, B, and C
+    //--------------------------------------------------------------------------
+
+    int64_t *GB_RESTRICT Cp = C->p ;
+    // const int64_t *GB_RESTRICT Ch = C->h ;
+    const int64_t cvlen = C->vlen ;
+    // const int64_t cnvec = C->nvec ;
+
+    const int64_t *GB_RESTRICT Bp = B->p ;
+    const int64_t *GB_RESTRICT Bh = B->h ;
+    const int64_t *GB_RESTRICT Bi = B->i ;
+    // const GB_BTYPE *GB_RESTRICT Bx = B_is_pattern ? NULL : B->x ;
+    // const int64_t bvlen = B->vlen ;
+    // const int64_t bnvec = B->nvec ;
+    // const bool B_is_hyper = B->is_hyper ;
+
+    const int64_t *GB_RESTRICT Ap = A->p ;
+    const int64_t *GB_RESTRICT Ah = A->h ;
+    const int64_t *GB_RESTRICT Ai = A->i ;
+    const int64_t anvec = A->nvec ;
+    const bool A_is_hyper = GB_IS_HYPER (A) ;
+    // const GB_ATYPE *GB_RESTRICT Ax = A_is_pattern ? NULL : A->x ;
+
+    const int64_t *GB_RESTRICT Mp = NULL ;
+    const int64_t *GB_RESTRICT Mh = NULL ;
+    const int64_t *GB_RESTRICT Mi = NULL ;
+    const GB_void *GB_RESTRICT Mx = NULL ;
+    size_t msize = 0 ;
+    int64_t mnvec = 0 ;
+    bool M_is_hyper = false ;
+    if (M != NULL)
+    { 
+        Mp = M->p ;
+        Mh = M->h ;
+        Mi = M->i ;
+        Mx = (Mask_struct ? NULL : (M->x)) ;
+        msize = M->type->size ;
+        mnvec = M->nvec ;
+        M_is_hyper = M->is_hyper ;
+    }
+
+    // 3 cases:
+    //      M not present and Mask_comp false: compute C=A*B
+    //      M present     and Mask_comp false: compute C<M>=A*B
+    //      M present     and Mask_comp true : compute C<!M>=A*B
+    // If M is NULL on input, then Mask_comp is also false on input.
+
+    bool mask_is_M = (M != NULL && !Mask_comp) ;
+
+    //==========================================================================
+    // phase1: count nnz(C(:,j)) for coarse tasks, scatter M for fine tasks
+    //==========================================================================
+
+    // At this point, all of Hf [...] is zero, for all tasks.
+    // Hi and Hx are not initialized.
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        //----------------------------------------------------------------------
+        // get the task descriptor
+        //----------------------------------------------------------------------
+
+        int64_t hash_size = TaskList [taskid].hsize ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+
+        if (taskid < nfine)
+        {
+
+            //------------------------------------------------------------------
+            // no work for fine tasks in phase1 if M is not present
+            //------------------------------------------------------------------
+
+            if (M == NULL) continue ;
+
+            //------------------------------------------------------------------
+            // get the task descriptor
+            //------------------------------------------------------------------
+        
+            int64_t kk = TaskList [taskid].vector ;
+            // partition M(:,j)
+            GB_GET_M_j ;        // get M(:,j)
+            int team_size = TaskList [taskid].team_size ;
+            int master    = TaskList [taskid].master ;
+            int my_teamid = taskid - master ;
+            int64_t mystart, myend ;
+            GB_PARTITION (mystart, myend, mjnz, my_teamid, team_size) ;
+            mystart += pM_start ;
+            myend   += pM_start ;
+
+            if (use_Gustavson)
+            { 
+
+                //--------------------------------------------------------------
+                // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // Scatter the values of M(:,j) into Hf.  No atomics needed
+                // since all indices i in M(;,j) are unique.
+
+                uint8_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                GB_SCATTER_M_j (mystart, myend, 1) ;
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // The least significant 2 bits of Hf [hash] is the flag f, and
+                // the upper bits contain h, as (h,f).  After this phase1, if
+                // M(i,j)=1 then the hash table contains ((i+1),1) in Hf [hash]
+                // at some location.
+
+                // Later, the flag values of f = 2 and 3 are also used.
+                // Only f=1 is set in this phase.
+
+                // h == 0,   f == 0: unoccupied and unlocked
+                // h == i+1, f == 1: occupied with M(i,j)=1
+
+                int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                int64_t hash_bits = (hash_size-1) ;
+                for (int64_t pM = mystart ; pM < myend ; pM++) // scan my M(:,j)
+                {
+                    GB_GET_M_ij ;                   // get M(i,j)
+                    if (!mij) continue ;            // skip if M(i,j)=0
+                    int64_t i = Mi [pM] ;
+                    int64_t i_mine = ((i+1) << 2) + 1 ;  // ((i+1),1)
+                    for (GB_HASH (i))
+                    { 
+                        int64_t hf ;
+                        // swap my hash entry into the hash table
+                        GB_ATOMIC_CAPTURE
+                        {
+                            hf = Hf [hash] ; Hf [hash] = i_mine ;
+                        }
+                        if (hf == 0) break ;        // success
+                        // i_mine has been inserted, but a prior entry was
+                        // already there.  It needs to be replaced, so take
+                        // ownership of this displaced entry, and keep
+                        // looking until a new empty slot is found for it.
+                        i_mine = hf ;
+                    }
+                }
+            }
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // coarse tasks: compute nnz in each vector of A*B(:,kfirst:klast)
+            //------------------------------------------------------------------
+
+            int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+            int64_t kfirst = TaskList [taskid].start ;
+            int64_t klast  = TaskList [taskid].end ;
+            int64_t mark = 0 ;
+            // int64_t nk = klast - kfirst + 1 ;
+
+            if (use_Gustavson)
+            {
+
+                //--------------------------------------------------------------
+                // phase1: coarse Gustavson task
+                //--------------------------------------------------------------
+
+                if (M == NULL)
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: coarse Gustavson task, C=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all Hf.
+                    // Hf [i] is set to mark when C(i,j) is found.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;            // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        if (bjnz == 1)
+                        { 
+                            int64_t k = Bi [pB] ;   // get B(k,j)
+                            GB_GET_A_k ;            // get A(:,k)
+                            Cp [kk] = aknz ;        // nnz(C(:,j)) = nnz(A(:,k))
+                            continue ;
+                        }
+                        mark++ ;
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            if (aknz == cvlen)
+                            { 
+                                cjnz = cvlen ;  // A(:,k) is dense
+                                break ;         // so nnz(C(:,j)) = cvlen
+                            }
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;    // get A(i,k)
+                                if (Hf [i] != mark)     // if true, i is new
+                                { 
+                                    Hf [i] = mark ; // mark C(i,j) as seen
+                                    cjnz++ ;        // C(i,j) is a new entry
+                                }
+                            }
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+
+                }
+                else if (mask_is_M)
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: coarse Gustavson task, C<M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+
+                    // Hf [i] < mark    : M(i,j)=0, C(i,j) is ignored.
+                    // Hf [i] == mark   : M(i,j)=1, and C(i,j) not yet seen.
+                    // Hf [i] == mark+1 : M(i,j)=1, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;            // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        if (mjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        // scatter M(:,j)
+                        GB_SCATTER_M_j (pM_start, pM_end, mark) ;
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        { 
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                            #define GB_IKJ_VECTORIZE GB_PRAGMA_VECTORIZE
+                            #define GB_IKJ_IVDEP     GB_PRAGMA_IVDEP
+                            #define GB_IKJ                                     \
+                            {                                                  \
+                                if (Hf [i] == mark)   /* if true, M(i,j) is 1*/\
+                                {                                              \
+                                    Hf [i] = mark1 ;  /* mark C(i,j) as seen */\
+                                    cjnz++ ;          /* C(i,j) is new */      \
+                                }                                              \
+                            }
+                            GB_SCAN_M_j_OR_A_k ;
+                            #undef GB_IKJ_VECTORIZE
+                            #undef GB_IKJ_IVDEP
+                            #undef GB_IKJ
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+
+                }
+                else
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: coarse Gustavson task, C<!M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+
+                    // Hf [i] < mark    : M(i,j)=0, C(i,j) is not yet seen.
+                    // Hf [i] == mark   : M(i,j)=1, so C(i,j) is ignored.
+                    // Hf [i] == mark+1 : M(i,j)=0, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;                    // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        // scatter M(:,j)
+                        GB_SCATTER_M_j (pM_start, pM_end, mark) ;
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;   // get A(i,k)
+                                if (Hf [i] < mark)      // if true, M(i,j) is 0
+                                { 
+                                    Hf [i] = mark1 ;    // mark C(i,j) as seen
+                                    cjnz++ ;            // C(i,j) is a new entry
+                                }
+                            }
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // phase1: coarse hash task
+                //--------------------------------------------------------------
+
+                int64_t *GB_RESTRICT Hi = TaskList [taskid].Hi ;
+                int64_t hash_bits = (hash_size-1) ;
+
+                if (M == NULL)
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: coarse hash task, C=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let f = Hf [hash] and h = Hi [hash]
+
+                    // f < mark          : unoccupied.
+                    // h == i, f == mark : occupied with C(i,j)
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;            // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ; continue ;
+                        }
+                        if (bjnz == 1)
+                        { 
+                            int64_t k = Bi [pB] ;   // get B(k,j)
+                            GB_GET_A_k ;            // get A(:,k)
+                            Cp [kk] = aknz ;        // nnz(C(:,j)) = nnz(A(:,k))
+                            continue ;
+                        }
+                        mark++ ;
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;   // get A(i,k)
+                                for (GB_HASH (i))       // find i in hash
+                                {
+                                    if (Hf [hash] < mark)
+                                    { 
+                                        Hf [hash] = mark ; // insert C(i,j)
+                                        Hi [hash] = i ;
+                                        cjnz++ ;  // C(i,j) is a new entry.
+                                        break ;
+                                    }
+                                    if (Hi [hash] == i) break ;
+                                }
+                            }
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+
+                }
+                else if (mask_is_M)
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: hash task, C<M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let h = Hi [hash] and f = Hf [hash].
+
+                    // f < mark: unoccupied, M(i,j)=0, C(i,j) ignored if
+                    //           this case occurs while scanning A(:,k)
+                    // h == i, f == mark   : M(i,j)=1, and C(i,j) not yet seen.
+                    // h == i, f == mark+1 : M(i,j)=1, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;            // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        if (mjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        GB_HASH_M_j ;           // hash M(:,j)
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        { 
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                            #define GB_IKJ_VECTORIZE
+                            #define GB_IKJ_IVDEP
+                            #define GB_IKJ                                     \
+                            {                                                  \
+                                for (GB_HASH (i))       /* find i in hash */   \
+                                {                                              \
+                                    int64_t f = Hf [hash] ;                    \
+                                    if (f < mark) break ; /* M(i,j)=0; ignore*/\
+                                    if (Hi [hash] == i)   /* if true, i found*/\
+                                    {                                          \
+                                        if (f == mark)  /* if true, i is new */\
+                                        {                                      \
+                                            Hf [hash] = mark1 ; /* mark seen */\
+                                            cjnz++ ;    /* C(i,j) is new */    \
+                                        }                                      \
+                                        break ;                                \
+                                    }                                          \
+                                }                                              \
+                            }
+                            GB_SCAN_M_j_OR_A_k ;
+                            #undef GB_IKJ_VECTORIZE
+                            #undef GB_IKJ_IVDEP
+                            #undef GB_IKJ
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+
+                }
+                else
+                {
+
+                    //----------------------------------------------------------
+                    // phase1: coarse hash task, C<!M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let h = Hi [hash] and f = Hf [hash].
+
+                    // f < mark: unoccupied, M(i,j)=0, and C(i,j) not yet seen.
+                    // h == i, f == mark   : M(i,j)=1. C(i,j) ignored.
+                    // h == i, f == mark+1 : M(i,j)=0, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        GB_GET_B_j ;            // get B(:,j)
+                        if (bjnz == 0)
+                        { 
+                            Cp [kk] = 0 ;
+                            continue ;
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        GB_HASH_M_j ;           // hash M(:,j)
+                        int64_t cjnz = 0 ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;   // get A(i,k)
+                                for (GB_HASH (i))       // find i in hash
+                                {
+                                    if (Hf [hash] < mark)   // if true, i is new
+                                    { 
+                                        Hf [hash] = mark1 ; // mark C(i,j) seen
+                                        Hi [hash] = i ;
+                                        cjnz++ ;        // C(i,j) is a new entry
+                                        break ;
+                                    }
+                                    if (Hi [hash] == i) break ;
+                                }
+                            }
+                        }
+                        Cp [kk] = cjnz ;    // count the entries in C(:,j)
+                    }
+                }
+            }
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // check result for phase1 for fine tasks
+    //--------------------------------------------------------------------------
+
+    #ifdef GB_DEBUG
+    if (M != NULL)
+    {
+        for (taskid = 0 ; taskid < nfine ; taskid++)
+        {
+            int64_t kk = TaskList [taskid].vector ;
+            ASSERT (kk >= 0 && kk < B->nvec) ;
+            int64_t hash_size = TaskList [taskid].hsize ;
+            bool use_Gustavson = (hash_size == cvlen) ;
+            int master = TaskList [taskid].master ;
+            if (master != taskid) continue ;
+            GB_GET_M_j ;        // get M(:,j)
+            int64_t mjcount2 = 0 ;
+            int64_t mjcount = 0 ;
+            for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+            {
+                GB_GET_M_ij ;           // get M(i,j)
+                if (mij) mjcount++ ;
+            }
+            if (use_Gustavson)
+            {
+                // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
+                uint8_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+                {
+                    GB_GET_M_ij ;                    // get M(i,j)
+                    ASSERT (Hf [Mi [pM]] == mij) ;
+                }
+                for (int64_t i = 0 ; i < cvlen ; i++)
+                {
+                    ASSERT (Hf [i] == 0 || Hf [i] == 1) ;
+                    if (Hf [i] == 1) mjcount2++ ;
+                }
+                ASSERT (mjcount == mjcount2) ;
+            }
+            else
+            {
+                // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
+                // h == 0,   f == 0: unoccupied and unlocked
+                // h == i+1, f == 1: occupied with M(i,j)=1
+                int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                int64_t hash_bits = (hash_size-1) ;
+                for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+                {
+                    GB_GET_M_ij ;                   // get M(i,j)
+                    if (!mij) continue ;            // skip if M(i,j)=0
+                    int64_t i = Mi [pM] ;
+                    int64_t i_mine = ((i+1) << 2) + 1 ;  // ((i+1),1)
+                    int64_t probe = 0 ;
+                    for (GB_HASH (i))
+                    {
+                        int64_t hf = Hf [hash] ;
+                        if (hf == i_mine) 
+                        {
+                            mjcount2++ ;
+                            break ;
+                        }
+                        ASSERT (hf != 0) ;
+                        probe++ ;
+                        ASSERT (probe < cvlen) ;
+                    }
+                }
+                ASSERT (mjcount == mjcount2) ;
+                mjcount2 = 0 ;
+                for (int64_t hash = 0 ; hash < hash_size ; hash++)
+                {
+                    int64_t hf = Hf [hash] ;
+                    int64_t h = (hf >> 2) ;     // empty (0), or a 1-based 
+                    int64_t f = (hf & 3) ;      // 0 if empty or 1 if occupied
+                    if (f == 1) ASSERT (h >= 1 && h <= cvlen) ;
+                    ASSERT (hf == 0 || f == 1) ;
+                    if (f == 1) mjcount2++ ;
+                }
+                ASSERT (mjcount == mjcount2) ;
+            }
+        }
+    }
+    #endif
+}
+
diff --git a/Source/GB_AxB_saxpy_parallel.c b/Source/GB_AxB_saxpy_parallel.c
deleted file mode 100644
index 8589b01934..0000000000
--- a/Source/GB_AxB_saxpy_parallel.c
+++ /dev/null
@@ -1,534 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_saxpy_parallel: C<M>=A*B, C=A*B
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Parallel matrix-matrix multiply, A*B with optional mask M, using the saxpy
-// method.  This method is used by GrB_mxm, GrB_vxm, and GrB_mxv.  For both of
-// the latter two methods, B on input will be an nrows-by-1 column vxector.
-
-// The strategy is to "slice" (or partition) B, as B = [B0 B1 ... B(t-1)] if
-// there are t threads.  Then each thread k computes C(k) = A*B(k), and then
-// the result is concatenated, as C = [C0 C1 ... C(t-1)].
-
-// Each thread k computes an independent output matrix C(k), doing both its
-// analysis and numeric phases.
-
-// This strategy works well for OpenMP, but it could also be written in a
-// purely inspector+executor style, like the GB_AxB_dot* methods.  Those
-// methods do the analysis in parallel, and first determine the size of the
-// output matrix C.  Then a parallel cumulative sum is computed, and the entire
-// output matrix is allocated.  Then each task of the the numeric phase
-// computes its part of the result C, without the need for any memory
-// allocation by individual threads.
-
-// This function, and the matrices C, M, A, and B are all CSR/CSC agnostic.
-// For this discussion, suppose they are CSC, with vlen = # of rows, and vdim =
-// # of columns.
-
-// A*B is being computed, and the vector dimension of A must be identical to
-// the vector length of B (as if both A and B are CSC matrices, and the number
-// of columns of A is the same as the number of rows of B).
-
-// The output matrix C = *Chandle has not been allocated, so C is NULL on
-// input.  The mask M is optional.
-
-// The semiring defines C=A*B.  flipxy modifies how the semiring multiply
-// operator is applied.  If false, then fmult(aik,bkj) is computed.  If true,
-// then the operands are swapped, and fmult(bkj,aij) is done instead.
-
-// AxB_method selects the method to use:
-
-//      GxB_DEFAULT:        the method is selected automatically
-
-//      GxB_AxB_GUSTAVSON:  Gustavson's method for A*B
-
-//      GxB_AxB_HEAP:       heap method for A*B
-
-//      GxB_AxB_HASH:       hash method for A*B (FUTURE)
-
-// The dot product method does not use this function.
-
-// AxB_method_used reports the method actually chosen.  This is for
-// informational purposes only, so if a parallel C=A*B splits the work into
-// multiple submatrix multiplications, and uses different methods on each
-// submatrix, then AxB_method_used is the method chosen by thread zero.
-
-// FUTURE:: hash-based method, and multi-phase Gustavson and Heap methods,
-// which do not do any memory allocations in parallel, but instead use an
-// inspector+executur style (like GB_AxB_dot*).  This should work better on the
-// GPU.
-
-#include "GB_mxm.h"
-#include "GB_Sauna.h"
-
-#define GB_FREE_WORK                                                        \
-{                                                                           \
-    GB_FREE_MEMORY (Slice, nthreads+1, sizeof (int64_t)) ;                  \
-    GB_FREE_MEMORY (Bflops, bnvec+1, sizeof (int64_t)) ;                    \
-    GB_FREE_MEMORY (Bflops_per_entry, bnz+1, sizeof (int64_t)) ;            \
-    GB_FREE_MEMORY (AxB_methods_used, nthreads, sizeof (GrB_Desc_Value)) ;  \
-    GB_FREE_MEMORY (bjnz_max, nthreads, sizeof (int64_t)) ;                 \
-    GB_FREE_MEMORY (Sauna_ids, nthreads, sizeof (int)) ;                    \
-    if (Cslice != NULL)                                                     \
-    {                                                                       \
-        for (int tid = 0 ; tid < nthreads ; tid++)                          \
-        {                                                                   \
-            GB_MATRIX_FREE (& (Cslice [tid])) ;                             \
-        }                                                                   \
-    }                                                                       \
-    if (Bslice != NULL)                                                     \
-    {                                                                       \
-        for (int tid = 0 ; tid < nthreads ; tid++)                          \
-        {                                                                   \
-            GB_MATRIX_FREE (& (Bslice [tid])) ;                             \
-        }                                                                   \
-    }                                                                       \
-    GB_FREE_MEMORY (Cslice, nthreads, sizeof (GrB_Matrix)) ;                \
-    GB_FREE_MEMORY (Bslice, nthreads, sizeof (GrB_Matrix)) ;                \
-}
-
-#define GB_FREE_ALL                                                         \
-{                                                                           \
-    GB_FREE_WORK ;                                                          \
-    GB_MATRIX_FREE (Chandle) ;                                              \
-}
-
-GrB_Info GB_AxB_saxpy_parallel      // parallel matrix-matrix multiply
-(
-    GrB_Matrix *Chandle,            // output matrix, NULL on input
-    GrB_Matrix M,                   // optional mask matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
-    GrB_Desc_Value *AxB_method_used,// method selected by thread zero
-    bool *mask_applied,             // if true, mask was applied
-    GB_Context Context
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Chandle != NULL) ;          // C = (*Chandle) is NULL
-    ASSERT (*Chandle == NULL) ;
-    ASSERT_MATRIX_OK_OR_NULL (M, "M for parallel A*B", GB0) ;
-    ASSERT_MATRIX_OK (A, "A for parallel A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for parallel A*B", GB0) ;
-    ASSERT (!GB_PENDING (M)) ; ASSERT (!GB_ZOMBIES (M)) ;
-    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
-    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for parallel A*B", GB0) ;
-    ASSERT (AxB_method_used != NULL) ;
-
-    GrB_Info info ;
-
-    //--------------------------------------------------------------------------
-    // get A and B
-    //--------------------------------------------------------------------------
-
-    if (B->nvec_nonempty < 0)
-    { 
-        B->nvec_nonempty = GB_nvec_nonempty (B, NULL) ;
-    }
-
-    if (A->nvec_nonempty < 0)
-    { 
-        A->nvec_nonempty = GB_nvec_nonempty (A, NULL) ;
-    }
-
-    int64_t anz   = GB_NNZ (A) ;
-
-    int64_t bnvec = B->nvec ;
-    int64_t bnz   = GB_NNZ (B) ;
-
-    //--------------------------------------------------------------------------
-    // determine the number of threads to use
-    //--------------------------------------------------------------------------
-
-    // nthreads may be reduced after the flopcount is computed.
-
-    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
-    int nthreads = GB_nthreads (anz + bnz, chunk, nthreads_max) ;
-
-    //--------------------------------------------------------------------------
-    // initialize workspace
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Slice = NULL ;
-    int64_t *GB_RESTRICT Bflops = NULL ;
-    int64_t *GB_RESTRICT Bflops_per_entry = NULL ;
-
-    // workspaces each of size nthreads:
-    GrB_Desc_Value *GB_RESTRICT AxB_methods_used = NULL ;
-    int64_t *GB_RESTRICT bjnz_max = NULL ;
-    int *Sauna_ids = NULL ;
-    GrB_Matrix *GB_RESTRICT Cslice = NULL ;
-    GrB_Matrix *GB_RESTRICT Bslice = NULL ;
-
-    //==========================================================================
-    // sequential C<M>=A*B
-    //==========================================================================
-
-    if (nthreads == 1)
-    {
-        // select the method
-        int64_t bjnz1max ;
-        GB_AxB_select (A, B, semiring, AxB_method, AxB_method_used, &bjnz1max) ;
-
-        // acquire a Sauna if Gustavson's method is being used
-        int Sauna_id = -2 ;
-        if (*AxB_method_used == GxB_AxB_GUSTAVSON)
-        { 
-            GB_OK (GB_Sauna_acquire (1, &Sauna_id, AxB_method_used, Context)) ;
-        }
-
-        // C<M>=A*B
-        GrB_Info info1 = GB_AxB_saxpy_sequential (Chandle, M, Mask_comp, A, B,
-            semiring, flipxy, *AxB_method_used, bjnz1max, true, mask_applied,
-            Sauna_id) ;
-
-        // release the Sauna for Gustavson's method
-        if (*AxB_method_used == GxB_AxB_GUSTAVSON)
-        { 
-            // info is reset, so info1 is used above
-            GB_OK (GB_Sauna_release (1, &Sauna_id)) ;
-        }
-        return ((info1 == GrB_OUT_OF_MEMORY) ? GB_OUT_OF_MEMORY : info1) ;
-    }
-
-    //==========================================================================
-    // parallel C<M>=A*B
-    //==========================================================================
-
-    // The # of threads may be reduced, if the problem small, even to
-    // nthreads=1.  But so far, for now, nthreads > 1.
-
-    ASSERT (nthreads > 1) ;
-
-    //--------------------------------------------------------------------------
-    // count the flops and determine # of threads to use
-    //--------------------------------------------------------------------------
-
-    int64_t total_flops ;
-    bool fine_slice = (nthreads > bnvec) ;
-    bool flopresult ;
-
-    if (!fine_slice)
-    {
-
-        //----------------------------------------------------------------------
-        // slice B by flops
-        //----------------------------------------------------------------------
-
-        // Slice B so that each slice has a balanced amount of flops, to
-        // compute its slice of C.  Each thread gets enough columns of B so
-        // that it has roughly total_flops / nthreads work to do.  Individual
-        // columns are not sliced, so the final step to compute C is a
-        // concatenation, not as summation.  This should give a very good load
-        // balance where there are enough columns of B, but at the cost of a
-        // more expensive symbolic analysis, taking O(bnz) time.  The analysis
-        // is itself fully parallel, however.  This method cannot parallelize
-        // A*B when B is a single column (GrB_mxv or GrB_vxm).
-
-        // thread tid will do columns Slice [tid] to Slice [tid+1]-1
-
-        // note that Bflops is initialized to zero
-        GB_CALLOC_MEMORY (Bflops, bnvec+1, sizeof (int64_t)) ;
-        if (Bflops == NULL)
-        { 
-            // out of memory
-            GB_FREE_ALL ;
-            return (GB_OUT_OF_MEMORY) ;
-        }
-
-        // Bflops [k] = # of flops to compute A*B(:,j) where j is the kth
-        // vector in B
-        GB_OK (GB_AxB_flopcount (&flopresult, Bflops, NULL,
-            (Mask_comp) ? NULL : M, A, B, 0, Context)) ;
-
-        // reduce # of threads, based on flop count and the chunk size
-        total_flops = Bflops [bnvec] ;
-
-    }
-    else
-    {
-
-        //----------------------------------------------------------------------
-        // fine slice of B by flops (split columns of B)
-        //----------------------------------------------------------------------
-
-        // Slice B so that each slice has nearly exactly balanced amount of
-        // flops to compute its slice of C.  Each thread gets exactly the
-        // number of entries so that it does total_flops/nthreads work (rounded
-        // to the nearest number of entries in B).
-
-        // note that Bflops_per_entry is initialized to zero
-        GB_CALLOC_MEMORY (Bflops_per_entry, bnz+1, sizeof (int64_t)) ;
-        if (Bflops_per_entry == NULL)
-        { 
-            // out of memory
-            GB_FREE_ALL ;
-            return (GB_OUT_OF_MEMORY) ;
-        }
-
-        // Bflops_per_entry [p] = # of flops to compute A(:,k)*B(k,j)
-        // where B(k,j) is in Bi [p] and Bx [p].
-        GB_OK (GB_AxB_flopcount (&flopresult, NULL, Bflops_per_entry,
-            (Mask_comp) ? NULL : M, A, B, 0, Context)) ;
-
-        // reduce # of threads, based on flop count and the chunk size
-        total_flops = Bflops_per_entry [bnz] ;
-    }
-
-    //--------------------------------------------------------------------------
-    // find the size of each slice
-    //--------------------------------------------------------------------------
-
-    nthreads = GB_nthreads (total_flops, chunk, nthreads_max) ;
-    bool ok_pslice ;
-
-    if (!fine_slice)
-    { 
-        // slice B by the flops needed for each vector
-        ok_pslice = GB_pslice (&Slice, Bflops, bnvec, nthreads) ;
-    }
-    else
-    { 
-        // slice B by the flops needed for each entry
-        ok_pslice = GB_pslice (&Slice, Bflops_per_entry, bnz, nthreads) ;
-    }
-
-    // free workspace
-    GB_FREE_MEMORY (Bflops, bnvec+1, sizeof (int64_t)) ;
-    GB_FREE_MEMORY (Bflops_per_entry, bnz+1, sizeof (int64_t)) ;
-
-    if (!ok_pslice)
-    {
-        // out of memory
-        GB_FREE_ALL ;
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // discard the mask if it's too costly to use
-    //--------------------------------------------------------------------------
-
-    if (M != NULL && total_flops < GB_NNZ (M))
-    { 
-        // The mask is too dense; discard it.  mask_applied will be false.
-        M = NULL ;
-    }
-
-    //--------------------------------------------------------------------------
-    // allocate workspace
-    //--------------------------------------------------------------------------
-
-    GB_CALLOC_MEMORY (AxB_methods_used, nthreads, sizeof (GrB_Desc_Value)) ;
-    GB_CALLOC_MEMORY (bjnz_max, nthreads, sizeof (int64_t)) ;
-    GB_CALLOC_MEMORY (Sauna_ids, nthreads, sizeof (int)) ;
-    GB_CALLOC_MEMORY (Cslice, nthreads, sizeof (GrB_Matrix)) ;
-    GB_CALLOC_MEMORY (Bslice, nthreads, sizeof (GrB_Matrix)) ;
-
-    if (AxB_methods_used == NULL || bjnz_max == NULL || Sauna_ids == NULL
-        || Cslice == NULL || Bslice == NULL)
-    { 
-        // out of memory
-        GB_FREE_ALL ;
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // construct each slice of B
-    //--------------------------------------------------------------------------
-
-    // If the problem is small enough so that nthreads has been reduced to 1,
-    // B is not sliced.
-
-    if (nthreads > 1)
-    {
-        if (fine_slice)
-        { 
-            GB_OK (GB_fine_slice (B, nthreads, Slice, Bslice, Context)) ;
-        }
-        else
-        { 
-            GB_OK (GB_slice (B, nthreads, Slice, Bslice, Context)) ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // select the method for each slice
-    //--------------------------------------------------------------------------
-
-    bool any_Gustavson = false ;
-
-    int tid ;
-    #pragma omp parallel for num_threads(nthreads) schedule(static,1) \
-        reduction(||:any_Gustavson)
-    for (tid = 0 ; tid < nthreads ; tid++)
-    { 
-        GrB_Desc_Value thread_method_to_use ;
-        GB_AxB_select (A, (nthreads == 1) ? B : Bslice [tid], semiring,
-            AxB_method, &thread_method_to_use, &(bjnz_max [tid])) ;
-        AxB_methods_used [tid] = thread_method_to_use ;
-        // collect all thread-specific info
-        any_Gustavson = any_Gustavson ||
-            (thread_method_to_use == GxB_AxB_GUSTAVSON) ;
-    }
-
-    (*AxB_method_used) = AxB_methods_used [0] ;
-
-    //--------------------------------------------------------------------------
-    // acquire the Saunas for each thread that needs it
-    //--------------------------------------------------------------------------
-
-    if (any_Gustavson)
-    { 
-        // at least one thread needs a Sauna
-        GB_OK (GB_Sauna_acquire (nthreads, Sauna_ids, AxB_methods_used,
-            Context)) ;
-    }
-    else
-    {
-        // no thread needs a Sauna
-        for (int tid = 0 ; tid < nthreads ; tid++)
-        { 
-            Sauna_ids [tid] = -2 ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // compute each slice of C = A*B with optional mask M
-    //--------------------------------------------------------------------------
-
-    // This is the only parallel region in which each thread allocates memory.
-    // The memory space is not known until the thread determines the size of
-    // its own output, in its analysis phase.  Note the "reduction(&&:ok)"
-    // clause.  This is the only place where a clause like that apppears in
-    // SuiteSparse:GraphBLAS.  This could be removed if C=A*B were to be
-    // computed with an inspector+exector style of algorithm.
-
-    // B has been "sliced"; in MATLAB notation, B = [B0 B1 B2 ... B(t-1] if
-    // there are t threads.  Then each k thread computes its own Ck = A*Bk,
-    // and the results are concatenated below, as C = [C0 C1 ... C(t-1)].
-    // If a 'fine slice' was used for B, then C = C0+C1+...+C(t-1) must be
-    // computed.
-
-    // for all threads in parallel, with no synchronization except for these
-    // boolean reductions:
-    bool ok = true ;        // false if any thread's malloc or realloc fails
-    bool panic = false ;    // true if any critical section fails
-    bool allmask = true ;   // true if all threads apply the mask
-
-    #pragma omp parallel for num_threads(nthreads) schedule(static,1) \
-        reduction(&&:allmask) reduction(||:panic) \
-        reduction(&&:ok)
-    for (tid = 0 ; tid < nthreads ; tid++)
-    { 
-        // each thread allocates its output, using malloc and realloc
-        bool thread_mask_applied = false ;
-        GrB_Info thread_info = GB_AxB_saxpy_sequential (&(Cslice [tid]), M,
-            Mask_comp, A, (nthreads == 1) ? B : Bslice [tid], semiring,
-            flipxy, AxB_methods_used [tid], bjnz_max [tid],
-            false, &thread_mask_applied, Sauna_ids [tid]) ;
-        // collect all thread-specific info
-        ok      = ok      && (thread_info == GrB_SUCCESS) ;
-        allmask = allmask && (thread_mask_applied) ;
-        panic   = panic   || (thread_info == GrB_PANIC) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // check error conditions
-    //--------------------------------------------------------------------------
-
-    // panic if a critical section fails
-    if (panic) return (GrB_PANIC) ;
-
-    // check the return info from all the threads
-    if (!ok)
-    { 
-        // out of memory
-        if (any_Gustavson)
-        { 
-            // at least one thread used a Sauna; free and release all Sauna
-            // workspaces
-            for (int tid = 0 ; tid < nthreads ; tid++)
-            {
-                int Sauna_id = Sauna_ids [tid] ;
-                if (Sauna_id >= 0)
-                { 
-                    GB_Sauna_free (Sauna_id) ;
-                }
-            }
-            GB_OK (GB_Sauna_release (nthreads, Sauna_ids)) ;
-        }
-        GB_FREE_ALL ;
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // release the Saunas
-    //--------------------------------------------------------------------------
-
-    if (any_Gustavson)
-    { 
-        // at least one thread used a Sauna
-        GB_OK (GB_Sauna_release (nthreads, Sauna_ids)) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // check if all threads applied the mask
-    //--------------------------------------------------------------------------
-
-    // if all threads applied the mask to their slices, then GB_accum_mask does
-    // not need to apply it to the concatenated C in GB_AxB_meta.  If just some
-    // of them did, then GB_accum_mask needs to apply the mask again.
-    (*mask_applied) = allmask ;
-
-    //--------------------------------------------------------------------------
-    // concatenate or sum the slices of C
-    //--------------------------------------------------------------------------
-
-    // Each slice Cslice [tid] has the same dimensions and type as C.  C is
-    // stored by column.
-
-    if (nthreads == 1)
-    { 
-        // one thread, so only one slice: just copy Cslice[0] to C
-        (*Chandle) = Cslice [0] ;
-        Cslice [0] = NULL ;
-    }
-    else if (fine_slice)
-    { 
-        // C = sum (Cslice [0..nthreads-1]).  Adjacent slices of C can share
-        // columns, which must be summed.  Columns in the middle of each slice
-        // are concatenated horizontally.
-        GB_OK (GB_hcat_fine_slice (Chandle, nthreads, Cslice, semiring->add,
-            Sauna_ids, Context)) ;
-    }
-    else
-    { 
-        // C = [Cslice(0) Cslice(1) ... Cslice(nthreads-1)] concatenatied
-        // horizontally.  Each slice contains entries that appear in a unique
-        // and contiguous subset of the columns of C.
-        GB_OK (GB_hcat_slice (Chandle, nthreads, Cslice, Context)) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // free workspace and return result
-    //--------------------------------------------------------------------------
-
-    GB_FREE_WORK ;
-    ASSERT_MATRIX_OK (*Chandle, "C for parallel A*B", GB0) ;
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_AxB_saxpy_sequential.c b/Source/GB_AxB_saxpy_sequential.c
deleted file mode 100644
index bdc2184040..0000000000
--- a/Source/GB_AxB_saxpy_sequential.c
+++ /dev/null
@@ -1,107 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_saxpy_sequential: C<M>=A*B using a single thread
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Sequential matrix-matrix multiply using a saxpy-based method.  For a
-// description of the arguments, see GB_AxB_parallel.  Only a single thread
-// does this work.
-
-// Does not log an error; returns GrB_SUCCESS, GrB_OUT_OF_MEMORY, or GrB_PANIC.
-
-#include "GB_mxm.h"
-
-#define GB_FREE_ALL ;
-
-GrB_Info GB_AxB_saxpy_sequential    // single-threaded C<M>=A*B
-(
-    GrB_Matrix *Chandle,            // output matrix, NULL on input
-    GrB_Matrix M,                   // optional mask matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    const GrB_Desc_Value AxB_method,// already chosen
-    const int64_t bjnz_max,         // for heap method only
-    const bool check_for_dense_mask,// if true, check floplimit for mask 
-    bool *mask_applied,             // if true, mask was applied
-    const int Sauna_id              // Sauna to use, for Gustavson method only
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    // only one thread does this entire function
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT (Chandle != NULL) ;          // C = (*Chandle) is NULL
-    ASSERT (*Chandle == NULL) ;
-    ASSERT_MATRIX_OK_OR_NULL (M, "M for sequential A*B", GB0) ;
-    ASSERT_MATRIX_OK (A, "A for sequential A*B", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for sequential A*B", GB0) ;
-    ASSERT (!GB_PENDING (M)) ; ASSERT (!GB_ZOMBIES (M)) ;
-    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
-    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for sequential A*B", GB0) ;
-    ASSERT (mask_applied != NULL) ;
-    ASSERT (AxB_method != GxB_AxB_DOT) ;
-
-    //----------------------------------------------------------------------
-    // C<M> = A*B via a saxpy-based method
-    //----------------------------------------------------------------------
-
-    // Decide whether or not to use the mask.
-
-    if (M != NULL)
-    {
-        if (Mask_comp)
-        { 
-            // the saxpy methods cannot handle a complemented mask at all.
-            // Discard the mask; mask_applied will be false.
-            M = NULL ;
-        }
-        else if (check_for_dense_mask)
-        {
-            // The saxpy methods can handle any mask that's not complemented,
-            // but they will examine each entry in the mask.  This is costly if
-            // (total_flops < nnz(M)).  This condition is not checked if it has
-            // already been considered in the caller.
-            int64_t floplimit = GB_NNZ (M) ;
-            bool flopresult ;
-            GrB_Info info ;
-            GB_OK (GB_AxB_flopcount (&flopresult, NULL, NULL, M, A, B, 
-                floplimit, NULL)) ;
-            if (flopresult)
-            { 
-                // total_flops < nnz(M), so the mask is too dense to use.
-                // Discard the mask; mask_applied will be false.
-                M = NULL ;
-            }
-        }
-    }
-
-    // do the matrix multiply
-    if (AxB_method == GxB_AxB_HEAP)
-    { 
-        // C<M> = A*B via heap method
-        ASSERT (A->vdim == B->vlen) ;
-        return (GB_AxB_heap (Chandle, M, Mask_comp, A, B, semiring,
-            flipxy, mask_applied, bjnz_max)) ;
-    }
-    else // AxB_method == GxB_AxB_GUSTAVSON
-    { 
-        // C<M> = A*B via Gustavson method
-        ASSERT (A->vdim == B->vlen) ;
-        return (GB_AxB_Gustavson (Chandle, M, Mask_comp, A, B, semiring,
-            flipxy, mask_applied, Sauna_id)) ;
-    }
-}
-
diff --git a/Source/GB_AxB_select.c b/Source/GB_AxB_select.c
deleted file mode 100644
index 332248f2a8..0000000000
--- a/Source/GB_AxB_select.c
+++ /dev/null
@@ -1,160 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_select: select method for C<M>=A*B or C=A*B
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Select a saxpy method for each thread: Gustavon's or heap-based method.
-// This method is called by GB_AxB_saxpy_parallel.
-
-#include "GB_mxm.h"
-#include "GB_iterator.h"
-
-void GB_AxB_select                  // select method for A*B
-(
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
-    // output
-    GrB_Desc_Value *AxB_method_used,        // method to use
-    int64_t *bjnz_max                       // # entries in densest col of B
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    // only one thread does this entire function
-    GB_Context Context = NULL ;
-    #endif
-    ASSERT_MATRIX_OK (A, "A for AxB select", GB0) ;
-    ASSERT_MATRIX_OK (B, "B for AxB select", GB0) ;
-    ASSERT_SEMIRING_OK (semiring, "semiring for AxB select", GB0) ;
-    ASSERT (AxB_method_used != NULL) ;
-    (*AxB_method_used) = GxB_DEFAULT ;
-
-    //----------------------------------------------------------------------
-    // select the type of saxpy method for A*B
-    //----------------------------------------------------------------------
-
-    // GB_AxB_heap and GB_AxB_Gustavson compute the same thing (C=A*B or
-    // C<M>=A*B), and both use the saxpy method.  They differ in the
-    // workspace they use.  GB_AxB_heap uses a heap of size O(b), while
-    // GB_AxB_Gustavson uses a Sauna (gather/scatter workspace) of size
-    // O(m) where m = C->vlen = A->vlen.
-
-    // Let b = max (nnz (B (:,j))), for all j; the maximum number of
-    // entries in any column of B.
-
-    // GB_AxB_heap uses workspace of 5 * (b+1) * sizeof (int64_t).
-
-    // find the densest column of B, also recount B->nvec_nonempty, since
-    // it may not yet be known.
-    int64_t b = 0 ;
-    int64_t nvec_nonempty = 0 ;
-    GBI_for_each_vector (B)
-    { 
-        GBI_jth_iteration (j, pB_start, pB_end) ;
-        int64_t bjnz = pB_end - pB_start ;
-        b = GB_IMAX (b, bjnz) ;
-        if (bjnz > 0) nvec_nonempty++ ;
-    }
-
-    (*bjnz_max) = b ;
-    B->nvec_nonempty = nvec_nonempty ;
-
-    double heap_memory = GBYTES (b+1, 5 * sizeof (int64_t)) ;
-
-    // GB_AxB_Gustavson uses a Sauna of size m * csize where C is m-by-n.
-
-    int64_t m = A->vlen ;       // A is m-by-k
-    int64_t k = B->vlen ;       // B is k-by-n
-    size_t csize = semiring->add->op->ztype->size ;
-    double gs_memory = GBYTES (m, csize + sizeof (int64_t)) ;
-
-    bool use_heap ;
-    int64_t bnz = GB_NNZ (B) ;
-    int64_t anz = GB_NNZ (A) ;
-
-    if (AxB_method == GxB_DEFAULT)
-    {
-        if (b <= 2)
-        { 
-            // use the Heap method if all columns of B have 2 entries or
-            // less.  This is the case if B is a diagonal scaling matrix, a
-            // permutation matrix, or upper/lower bidiagonal.  The heap
-            // will have size 2, which is very small and will be very fast.
-            use_heap = true ;
-        }
-        else if (bnz <= 3*k || bnz <= m || anz <= GB_IMIN (k,m))
-        { 
-
-            // If B is very sparse, with an average of 3 entries per
-            // column, then it is a good candidate for the heap method.
-            // The heap method will use O(b) memory, which is at most
-            // O(nnz(B)).  The size of A, B, and C could be dwarfed by the
-            // O(m) gather/scatter memory, which makes Gustavson's
-            // prohibitively expensive.  If A is extremely sparse (the 2nd
-            // condition above) then the heap method is also competitive.
-            // In each of these cases, use the heap method if it requires
-            // much less memory.
-
-            // The 40*b memory for the heap also comes at the cost of run
-            // time; accessing a heap of size O(b) adds an extra factor of
-            // O(log(b)) to the run time.  Thus the heap method is
-            // penalized by a factor of 4*log2(b).
-
-            // The heap always uses 40*b bytes for the heap.  If csize=8 (a
-            // 64-bit type), then gather/scatter uses 9*m bytes.  So the
-            // following rule becomes :
-
-            // use_heap = (4*log(b)*40b < 9*m) or roughly (16*b*log(b)<m).
-
-            int log2b = 0 ;
-            while (b > 0)
-            {
-                b = b / 2 ;
-                log2b++ ;
-            }
-
-            use_heap = (4 * log2b * heap_memory < gs_memory) ;
-        }
-        else
-        { 
-            // Otherwise, do not use the heap method; use Gustavson's
-            // method instead.  Since nnz(B) > m and Gustavson's method
-            // requires O(m) workspace, the size of the workspace will be
-            // less than the size of the input matrices.  In this case
-            // Gustavson's method tends to be faster than the heap method.
-            use_heap = false ;
-        }
-    }
-    else
-    { 
-        // allow the user to select gather/scatter vs heap
-        use_heap = (AxB_method == GxB_AxB_HEAP) ;
-    }
-
-    //----------------------------------------------------------------------
-    // choose the saxpy method for C<M>=A*B or C=A*B
-    //----------------------------------------------------------------------
-
-    if (use_heap)
-    { 
-        // use saxpy method with a heap; hypersparse matrices will tend to
-        // use this option.
-        (*AxB_method_used) = GxB_AxB_HEAP ;
-    }
-    else
-    { 
-        // use saxpy method with a gather/scatter workspace.
-        (*AxB_method_used) = GxB_AxB_GUSTAVSON ;
-    }
-}
-
diff --git a/Source/GB_AxB_semiring_builtin.c b/Source/GB_AxB_semiring_builtin.c
index 994e0effa6..8a2aeda487 100644
--- a/Source/GB_AxB_semiring_builtin.c
+++ b/Source/GB_AxB_semiring_builtin.c
@@ -2,7 +2,7 @@
 // GB_AxB_semiring_builtin:  determine if semiring is built-in
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -55,7 +55,7 @@ bool GB_AxB_semiring_builtin        // true if semiring is builtin
     //--------------------------------------------------------------------------
 
     (*add_opcode) = add->opcode ;
-    if (*add_opcode >= GB_USER_C_opcode)
+    if (*add_opcode >= GB_USER_opcode)
     { 
         // semiring has a user-defined add operator for its monoid
         return (false) ;
@@ -76,8 +76,29 @@ bool GB_AxB_semiring_builtin        // true if semiring is builtin
     // check the multiply operator
     //--------------------------------------------------------------------------
 
-    return (GB_binop_builtin (A, A_is_pattern, B, B_is_pattern, mult, flipxy,
-        mult_opcode, xycode, zcode)) ;
+    if (!GB_binop_builtin (A->type, A_is_pattern, B->type, B_is_pattern,
+        mult, flipxy, mult_opcode, xycode, zcode))
+    { 
+        return (false) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // rename to ANY_PAIR
+    //--------------------------------------------------------------------------
+
+    if ((*mult_opcode) == GB_PAIR_opcode)
+    { 
+        if (((*add_opcode) == GB_EQ_opcode) ||
+            ((*add_opcode) == GB_LAND_opcode) ||
+            ((*add_opcode) == GB_LOR_opcode) ||
+            ((*add_opcode) == GB_MAX_opcode) ||
+            ((*add_opcode) == GB_MIN_opcode) ||
+            ((*add_opcode) == GB_TIMES_opcode))
+        // rename to ANY_PAIR
+        (*add_opcode) = GB_PAIR_opcode ;
+    }
+
+    return (true) ;
 }
 
 #endif
diff --git a/Source/GB_BinaryOp_check.c b/Source/GB_BinaryOp_check.c
index 04bc0c9090..e17fd0084a 100644
--- a/Source/GB_BinaryOp_check.c
+++ b/Source/GB_BinaryOp_check.c
@@ -2,14 +2,11 @@
 // GB_BinaryOp_check: check and print a binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_BinaryOp_check  // check a GraphBLAS binary operator
@@ -44,13 +41,9 @@ GrB_Info GB_BinaryOp_check  // check a GraphBLAS binary operator
 
     if (pr > 0)
     {
-        if (op->opcode == GB_USER_C_opcode)
-        { 
-            GBPR ("(compile-time user-defined) ") ;
-        }
-        else if (op->opcode == GB_USER_R_opcode)
+        if (op->opcode >= GB_USER_opcode)
         { 
-            GBPR ("(run-time user-defined) ") ;
+            GBPR ("(user-defined) ") ;
         }
         else
         { 
@@ -68,7 +61,7 @@ GrB_Info GB_BinaryOp_check  // check a GraphBLAS binary operator
             GB_NAME, op->name))) ;
     }
 
-    if (op->opcode < GB_FIRST_opcode || op->opcode > GB_USER_R_opcode)
+    if (op->opcode < GB_FIRST_opcode || op->opcode > GB_USER_opcode)
     { 
         GBPR0 ("    BinaryOp has an invalid opcode\n") ;
         return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
diff --git a/Source/GB_BinaryOp_compatible.c b/Source/GB_BinaryOp_compatible.c
index 196560a971..a73255f4f7 100644
--- a/Source/GB_BinaryOp_compatible.c
+++ b/Source/GB_BinaryOp_compatible.c
@@ -2,7 +2,7 @@
 // GB_BinaryOp_compatible: check binary operator for type compatibility
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -37,7 +37,7 @@ GrB_Info GB_BinaryOp_compatible     // check for domain mismatch
     // first input A is cast into the type of op->xtype
     //--------------------------------------------------------------------------
 
-    if (op->opcode == GB_SECOND_opcode)
+    if (op->opcode == GB_SECOND_opcode || op->opcode == GB_PAIR_opcode)
     { 
         // first input is unused, so A is always compatible
         ;
@@ -55,7 +55,7 @@ GrB_Info GB_BinaryOp_compatible     // check for domain mismatch
     // second input B is cast into the type of op->ytype
     //--------------------------------------------------------------------------
 
-    if (op->opcode == GB_FIRST_opcode)
+    if (op->opcode == GB_FIRST_opcode || op->opcode == GB_PAIR_opcode)
     { 
         // second input is unused, so B is always compatible
         ;
diff --git a/Source/GB_BinaryOp_new.c b/Source/GB_BinaryOp_new.c
index c4e0f2576c..94638a823c 100644
--- a/Source/GB_BinaryOp_new.c
+++ b/Source/GB_BinaryOp_new.c
@@ -2,7 +2,7 @@
 // GB_BinaryOp_new: create a new binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -58,7 +58,7 @@ GrB_Info GB_BinaryOp_new
     op->ztype = ztype ;
     op->function = function ;
     strncpy (op->name, name, GB_LEN-1) ;
-    op->opcode = GB_USER_R_opcode ;     // run-time user-defined operator
+    op->opcode = GB_USER_opcode ;     // user-defined operator
     ASSERT_BINARYOP_OK (op, "new user-defined binary op", GB0) ;
     return (GrB_SUCCESS) ;
 }
diff --git a/Source/GB_Descriptor_check.c b/Source/GB_Descriptor_check.c
index b129740b2f..59b0e56bc0 100644
--- a/Source/GB_Descriptor_check.c
+++ b/Source/GB_Descriptor_check.c
@@ -2,14 +2,11 @@
 // GB_Descriptor_check: check and print a Descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 //------------------------------------------------------------------------------
@@ -18,10 +15,10 @@
 
 static GrB_Info GB_dc
 (
-    bool spec,
+    int kind,                           // 0, 1, or 2
     const char *field,
     const GrB_Desc_Value v,
-    const GrB_Desc_Value nondefault,
+    const GrB_Desc_Value nondefault,    // for kind == 0
     int pr,
     FILE *f,
     GB_Context Context
@@ -32,16 +29,20 @@ static GrB_Info GB_dc
     GrB_Info info = GrB_SUCCESS ;
 
     GBPR0 ("    d.%s = ", field) ;
-    switch (v)
+    switch ((int) v)
     {
-        case GxB_DEFAULT       : GBPR0 ("default   ") ; break ;
-        case GrB_SCMP          : GBPR0 ("complement") ; break ;
-        case GrB_TRAN          : GBPR0 ("transpose ") ; break ;
-        case GrB_REPLACE       : GBPR0 ("replace   ") ; break ;
-        case GxB_AxB_GUSTAVSON : GBPR0 ("Gustavson ") ; break ;
-        case GxB_AxB_HEAP      : GBPR0 ("heap      ") ; break ;
-        case GxB_AxB_DOT       : GBPR0 ("dot       ") ; break ;
-        default                : GBPR0 ("unknown   ") ;
+        case GxB_DEFAULT            : GBPR0 ("default   ") ; break ;
+        case GrB_COMP               : GBPR0 ("complement") ; break ;
+        case GrB_STRUCTURE          : GBPR0 ("structure ") ; break ;
+        case GrB_COMP+GrB_STRUCTURE : GBPR0 ("structural complement") ; break ;
+        case GrB_TRAN               : GBPR0 ("transpose ") ; break ;
+        case GrB_REPLACE            : GBPR0 ("replace   ") ; break ;
+        case GxB_AxB_SAXPY          : GBPR0 ("saxpy     ") ; break ;
+        case GxB_AxB_GUSTAVSON      : GBPR0 ("Gustavson ") ; break ;
+        case GxB_AxB_HEAP           : GBPR0 ("heap      ") ; break ;
+        case GxB_AxB_HASH           : GBPR0 ("hash      ") ; break ;
+        case GxB_AxB_DOT            : GBPR0 ("dot       ") ; break ;
+        default                     : GBPR0 ("unknown   ") ;
             info = GrB_INVALID_OBJECT ;
             ok = false ;
             break ;
@@ -49,7 +50,7 @@ static GrB_Info GB_dc
 
     if (ok)
     {
-        if (spec)
+        if (kind == 0)
         {
             // descriptor field can be set to the default,
             // or one non-default value
@@ -58,11 +59,21 @@ static GrB_Info GB_dc
                 ok = false ;
             }
         }
-        else
+        else if (kind == 1)
+        {
+            // mask
+            if (! (v == GxB_DEFAULT || v == GrB_COMP || v == GrB_STRUCTURE ||
+                   v == (GrB_COMP + GrB_STRUCTURE)))
+            {
+                ok = false ;
+            }
+        }
+        else // kind == 2
         {
             // GxB_AxB_METHOD:
             if (! (v == GxB_DEFAULT || v == GxB_AxB_GUSTAVSON
-                || v == GxB_AxB_HEAP || v == GxB_AxB_DOT))
+                || v == GxB_AxB_HEAP || v == GxB_AxB_DOT
+                || v == GxB_AxB_HASH || v == GxB_AxB_SAXPY))
             { 
                 ok = false ;
             }
@@ -117,11 +128,11 @@ GrB_Info GB_Descriptor_check    // check a GraphBLAS descriptor
     GBPR0 ("\n") ;
 
     GrB_Info info [5] ;
-    info [0] = GB_dc (true,  "out     ", D->out,  GrB_REPLACE, pr,f,Context) ;
-    info [1] = GB_dc (true,  "mask    ", D->mask, GrB_SCMP,    pr,f,Context) ;
-    info [2] = GB_dc (true,  "in0     ", D->in0,  GrB_TRAN,    pr,f,Context) ;
-    info [3] = GB_dc (true,  "in1     ", D->in1,  GrB_TRAN,    pr,f,Context) ;
-    info [4] = GB_dc (false, "axb     ", D->axb,  GxB_DEFAULT, pr,f,Context) ;
+    info [0] = GB_dc (0, "out     ", D->out,  GrB_REPLACE, pr,f,Context) ;
+    info [1] = GB_dc (1, "mask    ", D->mask, 0,           pr,f,Context) ;
+    info [2] = GB_dc (0, "in0     ", D->in0,  GrB_TRAN,    pr,f,Context) ;
+    info [3] = GB_dc (0, "in1     ", D->in1,  GrB_TRAN,    pr,f,Context) ;
+    info [4] = GB_dc (2, "axb     ", D->axb,  0,           pr,f,Context) ;
 
     for (int i = 0 ; i < 5 ; i++)
     {
diff --git a/Source/GB_Descriptor_get.c b/Source/GB_Descriptor_get.c
index 46e7c4577e..84f71a18ab 100644
--- a/Source/GB_Descriptor_get.c
+++ b/Source/GB_Descriptor_get.c
@@ -2,7 +2,7 @@
 // GB_Descriptor_get: get the status of a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,19 +20,24 @@
 //      descriptor does not affect how C is used to compute the results.  If
 //      GxB_DEFAULT, then C is not cleared before doing C<M>=results.
 
-//  desc->mask                  GxB_DEFAULT or GrB_SCMP
+//  desc->mask                  GxB_DEFAULT, GrB_COMP, GrB_STRUCTURE, or
+//                              GrB_COMP + GrB_STRUCTURE
 
 //      An optional 'write mask' defines how the results are to be written back
 //      into C.  The boolean mask matrix M has the same size as C (M is
-//      typecasted to boolean if it has another type).  If the M input to
-//      the GraphBLAS method is NULL, then implicitly M(i,j)=1 for all i and
-//      j.  Let Z be the results to be written into C (the same dimension as
-//      C).  If desc->mask is GxB_DEFAULT, and M(i,j)=1, then C(i,j) is
-//      over-written with Z(i,j).  Otherwise, if M(i,j)=0 C(i,j) is left
-//      unmodified (it remains an implicit zero if it is so, or its value is
-//      unchanged if it has one).  If desc->mask is GrB_SCMP, then the use of
-//      M is negated: M(i,j)=0 means that C(i,j) is overwritten with
-//      Z(i,j), and M(i,j)=1 means that C(i,j) is left unchanged.
+//      typecasted to boolean if it has another type).  If the M input to the
+//      GraphBLAS method is NULL, then implicitly M(i,j)=1 for all i and j.
+//      Let Z be the results to be written into C (the same dimension as C).
+//      If desc->mask is GxB_DEFAULT, and M(i,j)=1, then C(i,j) is over-written
+//      with Z(i,j).  Otherwise, if M(i,j)=0 C(i,j) is left unmodified (it
+//      remains an implicit zero if it is so, or its value is unchanged if it
+//      has one).  If desc->mask is GrB_COMP, then the use of M is negated:
+//      M(i,j)=0 means that C(i,j) is overwritten with Z(i,j), and M(i,j)=1
+//      means that C(i,j) is left unchanged.  If the value is GrB_STRUCTURE,
+//      only the pattern is used; any entry present in the pattern has the
+//      value M(i,j)=1, and entries not in the pattern have the value M(i,j)=0.
+//      The GrB_COMP and GrB_STUCTURE options can be combined, as GrB_COMP +
+//      GrB_STRUCTURE.
 
 //      Writing results Z into C via the mask M is written as C<M>=Z in
 //      GraphBLAS notation.
@@ -62,6 +67,8 @@
 
 //      GxB_AxB_HEAP            heap-based saxpy method
 
+//      GxB_AxB_HASH            hash-based saxpy method
+
 //      GxB_AxB_DOT             dot product
 
 //  desc->nthreads_max          max # number of threads to use (auto if <= 0)
@@ -75,6 +82,7 @@ GrB_Info GB_Descriptor_get      // get the contents of a descriptor
     const GrB_Descriptor desc,  // descriptor to query, may be NULL
     bool *C_replace,            // if true replace C before C<M>=Z
     bool *Mask_comp,            // if true use logical negation of M
+    bool *Mask_struct,          // if true use the structure of M
     bool *In0_transpose,        // if true transpose first input
     bool *In1_transpose,        // if true transpose second input
     GrB_Desc_Value *AxB_method, // method for C=A*B
@@ -108,10 +116,10 @@ GrB_Info GB_Descriptor_get      // get the contents of a descriptor
     { 
         // get the contents
         C_desc    = desc->out ;   // DEFAULT or REPLACE
-        Mask_desc = desc->mask ;  // DEFAULT or SCMP
+        Mask_desc = desc->mask ;  // DEFAULT, COMP, STRUCTURE, or COMP+STRUCTURE
         In0_desc  = desc->in0 ;   // DEFAULT or TRAN
         In1_desc  = desc->in1 ;   // DEFAULT or TRAN
-        AxB_desc  = desc->axb ;   // DEFAULT, GUSTAVSON, HEAP, or DOT
+        AxB_desc  = desc->axb ;   // DEFAULT, GUSTAVSON, HEAP, HASH, or DOT
 
         // default is zero.  if descriptor->nthreads_max <= 0, GraphBLAS selects
         // automatically: any value between 1 and the global nthreads_max.  If
@@ -123,11 +131,13 @@ GrB_Info GB_Descriptor_get      // get the contents of a descriptor
 
     // check for valid values of each descriptor field
     if (!(C_desc    == GxB_DEFAULT || C_desc    == GrB_REPLACE) ||
-        !(Mask_desc == GxB_DEFAULT || Mask_desc == GrB_SCMP) ||
+        !(Mask_desc == GxB_DEFAULT   || Mask_desc == GrB_COMP ||
+          Mask_desc == GrB_STRUCTURE || Mask_desc == GrB_COMP+GrB_STRUCTURE) ||
         !(In0_desc  == GxB_DEFAULT || In0_desc  == GrB_TRAN) ||
         !(In1_desc  == GxB_DEFAULT || In1_desc  == GrB_TRAN) ||
         !(AxB_desc  == GxB_DEFAULT || AxB_desc  == GxB_AxB_GUSTAVSON ||
-          AxB_desc  == GxB_AxB_DOT || AxB_desc  == GxB_AxB_HEAP))
+          AxB_desc  == GxB_AxB_DOT || AxB_desc  == GxB_AxB_HEAP ||
+          AxB_desc  == GxB_AxB_HASH || AxB_desc  == GxB_AxB_SAXPY))
     { 
         return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG, "Descriptor invalid"))) ;
     }
@@ -138,7 +148,13 @@ GrB_Info GB_Descriptor_get      // get the contents of a descriptor
     }
     if (Mask_comp != NULL)
     { 
-        *Mask_comp = (Mask_desc == GrB_SCMP) ;
+        *Mask_comp = (Mask_desc == GrB_COMP)
+                  || (Mask_desc == GrB_COMP + GrB_STRUCTURE) ;
+    }
+    if (Mask_struct != NULL)
+    { 
+        *Mask_struct = (Mask_desc == GrB_STRUCTURE)
+                    || (Mask_desc == GrB_STRUCTURE + GrB_COMP) ;
     }
     if (In0_transpose != NULL)
     { 
diff --git a/Source/GB_Global.c b/Source/GB_Global.c
index de9c5031aa..d319e72550 100644
--- a/Source/GB_Global.c
+++ b/Source/GB_Global.c
@@ -2,7 +2,7 @@
 // GB_Global: global values in GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,9 +11,8 @@
 // contents of the GB_Global struct are only accessible to functions in this
 // file.  Global storage is used to record a list of matrices with pending
 // operations (for GrB_wait), to keep track of the GraphBLAS mode (blocking or
-// non-blocking), to hold persistent Sauna workspace, for pointers to
-// malloc/calloc/realloc/free functions, global matrix options, and other
-// settings.
+// non-blocking), for pointers to malloc/calloc/realloc/free functions, global
+// matrix options, and other settings.
 
 #include "GB.h"
 #include "GB_printf.h"
@@ -61,13 +60,6 @@ typedef struct
     int nthreads_max ;          // max number of threads to use
     double chunk ;              // chunk size for determining # threads to use
 
-    //--------------------------------------------------------------------------
-    // Sauna: thread workspace for Gustavson's method
-    //--------------------------------------------------------------------------
-
-    GB_Sauna Saunas   [GxB_NTHREADS_MAX] ;
-    bool Sauna_in_use [GxB_NTHREADS_MAX] ;
-
     //--------------------------------------------------------------------------
     // hypersparsity and CSR/CSC format control
     //--------------------------------------------------------------------------
@@ -92,7 +84,6 @@ typedef struct
     void * (* calloc_function  ) (size_t, size_t) ;
     void * (* realloc_function ) (void *, size_t) ;
     void   (* free_function    ) (void *)         ;
-    void   (* persist_function ) (void *)         ;
     bool malloc_is_thread_safe ;   // default is true
 
     //--------------------------------------------------------------------------
@@ -128,9 +119,12 @@ typedef struct
     int64_t inuse ;                 // memory space current in use
     int64_t maxused ;               // high water memory usage
 
+    //--------------------------------------------------------------------------
+    // for testing and development
     //--------------------------------------------------------------------------
 
-    int64_t hack ;                  // for testing and development
+    int64_t hack ;                  // ad hoc setting (for draft versions only)
+    bool burble ;                   // controls GBBURBLE output
 
     //--------------------------------------------------------------------------
     // for MATLAB interface only
@@ -164,10 +158,6 @@ GB_Global_struct GB_Global =
     .hyper_ratio = GB_HYPER_DEFAULT,
     .is_csc = (GB_FORMAT_DEFAULT != GxB_BY_ROW),    // default is GxB_BY_ROW
 
-    // Sauna workspace for Gustavson's method (one per thread)
-    .Saunas [0] = NULL,
-    .Sauna_in_use [0] = false,
-
     // abort function for debugging only
     .abort_function   = abort,
 
@@ -176,7 +166,6 @@ GB_Global_struct GB_Global =
     .calloc_function  = calloc,
     .realloc_function = realloc,
     .free_function    = free,
-    .persist_function = NULL,
     .malloc_is_thread_safe = true,
 
     // malloc tracking, for testing, statistics, and debugging only
@@ -189,6 +178,7 @@ GB_Global_struct GB_Global =
 
     // for testing and development
     .hack = 0,
+    .burble = false,
 
     // for MATLAB interface only
     .print_one_based = false,       // if true, print 1-based indices
@@ -250,9 +240,7 @@ bool GB_Global_GrB_init_called_get (void)
 GB_PUBLIC   // accessed by the MATLAB interface only
 void GB_Global_nthreads_max_set (int nthreads_max)
 { 
-    nthreads_max = GB_IMIN (nthreads_max, GxB_NTHREADS_MAX) ;
-    nthreads_max = GB_IMAX (nthreads_max, 1) ;
-    GB_Global.nthreads_max = nthreads_max ;
+    GB_Global.nthreads_max = GB_IMAX (nthreads_max, 1) ;
 }
 
 GB_PUBLIC   // accessed by the MATLAB interface only
@@ -278,7 +266,7 @@ GB_PUBLIC   // accessed by the MATLAB interface only
 void GB_Global_chunk_set (double chunk)
 { 
     if (chunk <= GxB_DEFAULT) chunk = GB_CHUNK_DEFAULT ;
-    GB_Global.chunk = chunk ;
+    GB_Global.chunk = fmax (chunk, 1) ;
 }
 
 GB_PUBLIC   // accessed by the MATLAB interface only
@@ -321,34 +309,6 @@ bool GB_Global_is_csc_get (void)
     return (GB_Global.is_csc) ;
 }
 
-//------------------------------------------------------------------------------
-// Saunas [id]
-//------------------------------------------------------------------------------
-
-void GB_Global_Saunas_set (int id, GB_Sauna Sauna)
-{ 
-    GB_Global.Saunas [id] = Sauna ;
-}
-
-GB_Sauna GB_Global_Saunas_get (int id)
-{ 
-    return (GB_Global.Saunas [id]) ;
-}
-
-//------------------------------------------------------------------------------
-// Saunas_in_use [id]
-//------------------------------------------------------------------------------
-
-void GB_Global_Sauna_in_use_set (int id, bool in_use)
-{ 
-    GB_Global.Sauna_in_use [id] = in_use ;
-}
-
-bool GB_Global_Sauna_in_use_get (int id)
-{ 
-    return (GB_Global.Sauna_in_use [id]) ;
-}
-
 //------------------------------------------------------------------------------
 // abort_function
 //------------------------------------------------------------------------------
@@ -481,46 +441,6 @@ void GB_Global_free_function (void *p)
     }
 }
 
-//------------------------------------------------------------------------------
-// persist_function
-//------------------------------------------------------------------------------
-
-// This is only needed by the MATLAB interface, so that mexMakeMemoryPersistent
-// can be called to keep the Saunas allocated between calls to the
-// mexFunctions.  By default, the global persist_function is NULL, so it is not
-// used except when set to mexMakeMemoryPersistent in the mexFunction
-// interface.  The function pointer should be set immediately after calling
-// GxB_init.
-
-void GB_Global_persist_function_set (void (* persist_function) (void *))
-{
-    GB_Global.persist_function = persist_function ;
-}
-
-void GB_Global_persist_function (void *p)
-{ 
-    if (GB_Global.persist_function == NULL)
-    { 
-        return ;
-    }
-    #if defined (USER_POSIX_THREADS) || defined (USER_ANSI_THREADS)
-    bool ok = true ;
-    #endif
-    if (GB_Global.malloc_is_thread_safe)
-    {
-        GB_Global.persist_function (p) ;
-    }
-    else
-    {
-        #undef  GB_CRITICAL_SECTION
-        #define GB_CRITICAL_SECTION                             \
-        {                                                       \
-            GB_Global.persist_function (p) ;                    \
-        }
-        #include "GB_critical_section.c"
-    }
-}
-
 //------------------------------------------------------------------------------
 // malloc_is_thread_safe
 //------------------------------------------------------------------------------
@@ -646,6 +566,20 @@ int64_t GB_Global_hack_get (void)
     return (GB_Global.hack) ;
 }
 
+//------------------------------------------------------------------------------
+// burble: for controlling the burble output
+//------------------------------------------------------------------------------
+
+void GB_Global_burble_set (bool burble)
+{ 
+    GB_Global.burble = burble ;
+}
+
+bool GB_Global_burble_get (void)
+{ 
+    return (GB_Global.burble) ;
+}
+
 //------------------------------------------------------------------------------
 // for MATLAB interface only
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Global.h b/Source/GB_Global.h
index dc51b0eefa..c944f53d08 100644
--- a/Source/GB_Global.h
+++ b/Source/GB_Global.h
@@ -2,7 +2,7 @@
 // GB_Global.h: definitions for global variables
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -43,12 +43,6 @@ double   GB_Global_hyper_ratio_get (void) ;
 void     GB_Global_is_csc_set (bool is_csc) ;
 bool     GB_Global_is_csc_get (void) ;
 
-void     GB_Global_Saunas_set (int id, GB_Sauna Sauna) ;
-GB_Sauna GB_Global_Saunas_get (int id) ;
-
-bool     GB_Global_Sauna_in_use_get (int id) ;
-void     GB_Global_Sauna_in_use_set (int id, bool in_use) ;
-
 void     GB_Global_abort_function_set (void (* abort_function) (void)) ;
 void     GB_Global_abort_function (void) ;
 
@@ -73,10 +67,6 @@ void  *  GB_Global_realloc_function (void *p, size_t size) ;
 void     GB_Global_free_function_set (void (* free_function) (void *)) ;
 void     GB_Global_free_function (void *p) ;
 
-GB_PUBLIC   // accessed by the MATLAB interface only
-void     GB_Global_persist_function_set (void (* persist_function) (void *)) ;
-void     GB_Global_persist_function (void *p) ;
-
 void     GB_Global_malloc_is_thread_safe_set
          (
             bool malloc_is_thread_safe
@@ -106,6 +96,9 @@ int64_t  GB_Global_maxused_get (void) ;
 void     GB_Global_hack_set (int64_t hack) ;
 int64_t  GB_Global_hack_get (void) ;
 
+void     GB_Global_burble_set (bool burble) ;
+bool     GB_Global_burble_get (void) ;
+
 GB_PUBLIC   // accessed by the MATLAB interface only
 void     GB_Global_print_one_based_set (bool onebased) ;
 GB_PUBLIC   // accessed by the MATLAB interface only
diff --git a/Source/GB_I_inverse.c b/Source/GB_I_inverse.c
index a9a7fe515b..52bb94ce1a 100644
--- a/Source/GB_I_inverse.c
+++ b/Source/GB_I_inverse.c
@@ -2,7 +2,7 @@
 // GB_I_inverse: invert an index list
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -51,7 +51,7 @@ GrB_Info GB_I_inverse           // invert the I list for C=A(I,:)
     GB_CALLOC_MEMORY (Mark,  avlen, sizeof (int64_t)) ;
     GB_MALLOC_MEMORY (Inext, nI,    sizeof (int64_t)) ;
     if (Inext == NULL || Mark == NULL)
-    {
+    { 
         // out of memory
         GB_FREE_MEMORY (Mark,  avlen, sizeof (int64_t)) ;
         GB_FREE_MEMORY (Inext, nI,    sizeof (int64_t)) ;
@@ -115,7 +115,6 @@ GrB_Info GB_I_inverse           // invert the I list for C=A(I,:)
     // return result
     //--------------------------------------------------------------------------
 
-    // if (ndupl > 0) printf ("duplicates: "GBd"\n", ndupl) ;
     (*p_Mark ) = Mark ;
     (*p_Inext) = Inext ;
     (*p_ndupl) = ndupl ;
diff --git a/Source/GB_Index_multiply.c b/Source/GB_Index_multiply.c
index 803bb67a19..cae7ed9886 100644
--- a/Source/GB_Index_multiply.c
+++ b/Source/GB_Index_multiply.c
@@ -2,7 +2,7 @@
 // GB_Index_multiply:  multiply two integers and guard against overflow
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -37,7 +37,7 @@ bool GB_Index_multiply      // true if ok, false if overflow
     double da = ceil (log2 ((double) a)) ;
     double db = ceil (log2 ((double) b)) ;
     if (da + db > 60)
-    {
+    { 
         // a * b may overflow
         return (false) ;
     }
diff --git a/Source/GB_Mask_compatible.c b/Source/GB_Mask_compatible.c
index 5d256fbc3a..e0689d159e 100644
--- a/Source/GB_Mask_compatible.c
+++ b/Source/GB_Mask_compatible.c
@@ -2,7 +2,7 @@
 // GB_Mask_compatible: check input and operators for type compatibility
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Matrix_check.c b/Source/GB_Matrix_check.c
index 91f5346bfa..9b8f8e79b9 100644
--- a/Source/GB_Matrix_check.c
+++ b/Source/GB_Matrix_check.c
@@ -2,7 +2,7 @@
 // GB_Matrix_check: print a GraphBLAS matrix and check if it is valid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Monoid_check.c b/Source/GB_Monoid_check.c
index 1d88865c66..eb337f31fb 100644
--- a/Source/GB_Monoid_check.c
+++ b/Source/GB_Monoid_check.c
@@ -2,14 +2,11 @@
 // GB_Monoid_check: check and print a monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_Monoid_check        // check a GraphBLAS monoid
@@ -48,12 +45,8 @@ GrB_Info GB_Monoid_check        // check a GraphBLAS monoid
             GBPR0 ("(built-in)") ;
             break ;
 
-        case GB_USER_COMPILED :
-            GBPR0 ("(user-defined at compile-time)") ;
-            break ;
-
         case GB_USER_RUNTIME :
-            GBPR0 ("(user-defined at run-time)") ;
+            GBPR0 ("(user-defined)") ;
             break ;
 
         default :
diff --git a/Source/GB_Monoid_new.c b/Source/GB_Monoid_new.c
index 5084916c4e..8ec3590c01 100644
--- a/Source/GB_Monoid_new.c
+++ b/Source/GB_Monoid_new.c
@@ -2,7 +2,7 @@
 // GB_Monoid_new: create a GrB_Monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -39,7 +39,6 @@ GrB_Info GB_Monoid_new          // create a monoid
 
     ASSERT_BINARYOP_OK (op, "op for monoid", GB0) ;
     ASSERT (idcode <= GB_UDT_code) ;
-    ASSERT (idcode != GB_UCT_code) ;
 
     //--------------------------------------------------------------------------
     // rename built-in binary operators
@@ -120,15 +119,11 @@ GrB_Info GB_Monoid_new          // create a monoid
     // and this can be rigourously checked.  For all user-defined types,
     // identity is a mere void * pointer, and its actual type cannot be
     // compared with the input op->ztype parameter.  Only the type code,
-    // GB_UDT_code or GB_UCT_code, can be checked to see if it matches.  In
+    // GB_UDT_code, can be checked to see if it matches.  In
     // that case, all that is known is that identity is a void * pointer that
     // points to something, hopefully a scalar of the proper user-defined type.
 
-    // UCT code is treated as UDT, since GB_Monoid_new is never called with
-    // an idcode of UCT.
     GB_Type_code zcode = op->ztype->code ;
-    if (zcode == GB_UCT_code) zcode = GB_UDT_code ;
-
     if (idcode != zcode)
     { 
         return (GB_ERROR (GrB_DOMAIN_MISMATCH, (GB_LOG,
diff --git a/Source/GB_Pending.h b/Source/GB_Pending.h
index 59ed9593d3..0ba6012975 100644
--- a/Source/GB_Pending.h
+++ b/Source/GB_Pending.h
@@ -2,7 +2,7 @@
 // GB_Pending.h: data structure and operations for pending tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -183,5 +183,23 @@ static inline bool GB_Pending_add   // add a tuple to the list
     ilast = iC ;                                                            \
     jlast = jC ;
 
+//------------------------------------------------------------------------------
+// GB_shall_block: see if the matrix should be finished
+//------------------------------------------------------------------------------
+
+static inline bool GB_shall_block   // return true if GB_wait (A) should be done
+(
+    GrB_Matrix A
+)
+{
+
+    if (!GB_PENDING_OR_ZOMBIES (A)) return (false) ;
+    double npending = GB_Pending_n (A) ;
+    double anzmax = ((double) A->vlen) * ((double) A->vdim) ;
+    bool many_pending = (npending >= anzmax) ;
+    bool blocking = (GB_Global_mode_get ( ) == GrB_BLOCKING) ;
+    return (many_pending || blocking) ;
+}
+
 #endif
 
diff --git a/Source/GB_Pending_alloc.c b/Source/GB_Pending_alloc.c
index c2fa3dab83..700a4c1d54 100644
--- a/Source/GB_Pending_alloc.c
+++ b/Source/GB_Pending_alloc.c
@@ -2,7 +2,7 @@
 // GB_Pending_alloc: allocate a list of pending tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Pending_free.c b/Source/GB_Pending_free.c
index 9c5be75314..e557d208d1 100644
--- a/Source/GB_Pending_free.c
+++ b/Source/GB_Pending_free.c
@@ -2,7 +2,7 @@
 // GB_Pending_free: free a list of pending tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Pending_n.c b/Source/GB_Pending_n.c
index eaf7181777..c881498f80 100644
--- a/Source/GB_Pending_n.c
+++ b/Source/GB_Pending_n.c
@@ -2,7 +2,7 @@
 // GB_Pending_n: return the # of pending tuples in a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Pending_realloc.c b/Source/GB_Pending_realloc.c
index 18982e7154..6ba5e55499 100644
--- a/Source/GB_Pending_realloc.c
+++ b/Source/GB_Pending_realloc.c
@@ -2,7 +2,7 @@
 // GB_Pending_realloc: reallocate a list of pending tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_Sauna.h b/Source/GB_Sauna.h
deleted file mode 100644
index 4d3b7b1dc1..0000000000
--- a/Source/GB_Sauna.h
+++ /dev/null
@@ -1,108 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_Sauna.h: definitions for the Sauna, the sparse accumulator
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-#ifndef GB_SAUNA_H
-#define GB_SAUNA_H
-#include "GB.h"
-
-//------------------------------------------------------------------------------
-// Sauna data structure
-//------------------------------------------------------------------------------
-
-struct GB_Sauna_struct      // sparse accumulator
-{
-    int64_t Sauna_hiwater ; // Sauna_Mark [0..Sauna_n-1] < hiwater holds when
-                            // the Sauna_Mark is clear.
-    int64_t Sauna_n ;       // size of Sauna_Mark and Sauna_Work
-    int64_t *Sauna_Mark ;   // array of size Sauna_n
-    void    *Sauna_Work ;   // array of size Sauna_n, each entry Sauna_size
-    size_t  Sauna_size ;    // size of each entry in Sauna_Work
-} ;
-
-//------------------------------------------------------------------------------
-// Sauna functions
-//------------------------------------------------------------------------------
-
-GrB_Info GB_Sauna_acquire
-(
-    int nthreads,           // number of internal threads that need a Sauna
-    int *Sauna_ids,         // size nthreads, the Sauna id's acquired
-    GrB_Desc_Value *AxB_methods_used,       // size nthreads
-    GB_Context Context
-) ;
-
-GrB_Info GB_Sauna_alloc             // create a Sauna
-(
-    int Sauna_id,                   // id of Sauna to create
-    int64_t Sauna_n,                // size of the Sauna
-    size_t Sauna_size               // size of each entry in the Sauna
-) ;
-
-void GB_Sauna_free                  // free a Sauna
-(
-    int Sauna_id                    // id of Sauna to free
-) ;
-
-GrB_Info GB_Sauna_release
-(
-    int nthreads,           // number of internal threads that have a Sauna
-    int *Sauna_ids          // size nthreads, the Sauna id's to release
-) ;
-
-//------------------------------------------------------------------------------
-// ASSERT_SAUNA_IS_RESET
-//------------------------------------------------------------------------------
-
-// assert that all entries in Sauna_Mark are < Sauna_hiwater
-
-#ifdef GB_DEBUG
-    #define ASSERT_SAUNA_IS_RESET                                           \
-    {                                                                       \
-        for (int64_t i = 0 ; i < Sauna->Sauna_n ; i++)                      \
-        {                                                                   \
-            ASSERT (Sauna->Sauna_Mark [i] < Sauna->Sauna_hiwater) ;         \
-        }                                                                   \
-    }
-#else
-    #define ASSERT_SAUNA_IS_RESET
-#endif
-
-//------------------------------------------------------------------------------
-// GB_Sauna_reset: increment the Sauna_hiwater and clear Sauna_Mark if needed
-//------------------------------------------------------------------------------
-
-static inline int64_t GB_Sauna_reset
-(
-    GB_Sauna Sauna,     // Sauna to reset
-    int64_t reset,      // does Sauna_hiwater += reset
-    int64_t range       // clear Mark if Sauna_hiwater+reset+range overflows
-)
-{ 
-
-    ASSERT (Sauna != NULL) ;
-    Sauna->Sauna_hiwater += reset ;     // increment the Sauna_hiwater
-
-    if (Sauna->Sauna_hiwater + range <= 0 || reset == 0)
-    { 
-        // integer overflow has occurred; clear all of the Sauna_Mark array
-        for (int64_t i = 0 ; i < Sauna->Sauna_n ; i++)
-        { 
-            Sauna->Sauna_Mark [i] = 0 ;
-        }
-        Sauna->Sauna_hiwater = 1 ;
-    }
-
-    // assertion for debugging only:
-    ASSERT_SAUNA_IS_RESET ;         // assert that Sauna_Mark [...] < hiwater
-
-    return (Sauna->Sauna_hiwater) ;
-}
-
-#endif
-
diff --git a/Source/GB_Sauna_acquire.c b/Source/GB_Sauna_acquire.c
deleted file mode 100644
index e2dadfb433..0000000000
--- a/Source/GB_Sauna_acquire.c
+++ /dev/null
@@ -1,123 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_Sauna_acquire: acquire a set of Saunas
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// If the user only calls GraphBLAS from a single user thread, then all
-// internal threads will always find their native Sauna: tid == Sauna_id [tid].
-// The native Sauna is best since a thread should use workspace that it
-// allocated itself, for best performance in a NUMA memory system.
-
-#include "GB_Sauna.h"
-
-// The time spent in the critical section is typically O(nthreads) or, unless
-// the user calls GraphBLAS simultaneously from multiple user threads.  In that
-// case, (if try_again is true) the time could be O(GxB_NTHREADS_MAX), which is
-// 2048 by default.  If it becomes a performance bottleneck, doubly-linked list
-// of available Sauna ids could be kept.  The downside of a doubly-linked list
-// is that threads would tend not to acquire their native Saunas.
-
-GrB_Info GB_Sauna_acquire
-(
-    int nthreads,           // number of internal threads that need a Sauna
-    int *Sauna_ids,         // size nthreads, the Sauna id's acquired
-    GrB_Desc_Value *AxB_methods_used,       // size nthreads
-    GB_Context Context
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Sauna_ids != NULL) ;
-    ASSERT (nthreads >= 1) ;
-
-    //--------------------------------------------------------------------------
-    // acquire nthread ids of unused Saunas
-    //--------------------------------------------------------------------------
-
-    bool ok = true ;
-    bool try_again = false ;
-
-    // define the work to do inside the critical section
-    #define GB_CRITICAL_SECTION                                             \
-    {                                                                       \
-        /* try to acquire the native Saunas for each thread */              \
-        for (int tid = 0 ; tid < nthreads ; tid++)                          \
-        {                                                                   \
-            if (AxB_methods_used != NULL &&                                 \
-               (AxB_methods_used [tid] != GxB_AxB_GUSTAVSON))               \
-            {                                                               \
-                /* no need for a Sauna for this thread */                   \
-                Sauna_ids [tid] = -2 ;                                      \
-            }                                                               \
-            else if (GB_Global_Sauna_in_use_get (tid))                      \
-            {                                                               \
-                /* Saunas [tid] is already in use */                        \
-                try_again = true ;                                          \
-                Sauna_ids [tid] = -1 ;                                      \
-            }                                                               \
-            else                                                            \
-            {                                                               \
-                /* acquire the native Sauna */                              \
-                GB_Global_Sauna_in_use_set (tid, true) ;                    \
-                Sauna_ids [tid] = tid ;                                     \
-            }                                                               \
-        }                                                                   \
-        if (try_again)                                                      \
-        {                                                                   \
-            /* look for non-native Saunas for the unsatisfied threads */    \
-            int s = 0 ;                                                     \
-            for (int tid = 0 ; tid < nthreads ; tid++)                      \
-            {                                                               \
-                if (Sauna_ids [tid] == -1)                                  \
-                {                                                           \
-                    /* thread tid does not yet have a Sauna */              \
-                    for ( ; s < GxB_NTHREADS_MAX ; s++)                     \
-                    {                                                       \
-                        if (!GB_Global_Sauna_in_use_get (s))                \
-                        {                                                   \
-                            /* acquire the native Sauna */                  \
-                            GB_Global_Sauna_in_use_set (s, true) ;          \
-                            Sauna_ids [tid] = s ;                           \
-                            break ;                                         \
-                        }                                                   \
-                    }                                                       \
-                }                                                           \
-            }                                                               \
-        }                                                                   \
-    }
-
-    //--------------------------------------------------------------------------
-    // do the critical section, depending on user threading model
-    //--------------------------------------------------------------------------
-
-    #include "GB_critical_section.c"
-
-    if (!ok) return (GrB_PANIC) ;       // critical section failed!
-
-    //--------------------------------------------------------------------------
-    // check if all threads got a Sauna that need one
-    //--------------------------------------------------------------------------
-
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-        if (Sauna_ids [tid] == -1)
-        { 
-            // thread tid needs a Sauna but did not get one.  There are too
-            // many concurrent threads.  release all Sauna ids just acquired
-            GrB_Info info = GB_Sauna_release (nthreads, Sauna_ids) ;
-            if (info != GrB_SUCCESS) return (info) ;
-            return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
-                "Too many concurrent threads"))) ;
-        }
-    }
-
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_Sauna_alloc.c b/Source/GB_Sauna_alloc.c
deleted file mode 100644
index 8f34d9cf0d..0000000000
--- a/Source/GB_Sauna_alloc.c
+++ /dev/null
@@ -1,79 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_Sauna_alloc: create a new Sauna
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Does not use error reporting; returns GrB_SUCCESS or GrB_OUT_OF_MEMORY.
-
-#include "GB_Sauna.h"
-
-GrB_Info GB_Sauna_alloc             // create a Sauna
-(
-    int Sauna_id,                   // id of Sauna to create
-    int64_t Sauna_n,                // size of the Sauna
-    size_t Sauna_size               // size of each entry in the Sauna
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Sauna_id >= 0 && Sauna_id < GxB_NTHREADS_MAX) ;
-
-    //--------------------------------------------------------------------------
-    // allocate the Sauna header
-    //--------------------------------------------------------------------------
-
-    GB_Sauna Sauna ;
-    GB_CALLOC_MEMORY (Sauna, 1, sizeof (struct GB_Sauna_struct)) ;
-    if (Sauna == NULL)
-    { 
-        // out of memory
-        return (GrB_OUT_OF_MEMORY) ;
-    }
-
-    // save it in the global table
-    GB_Global_Saunas_set (Sauna_id, Sauna) ;
-
-    //--------------------------------------------------------------------------
-    // allocate the contents of the Sauna
-    //--------------------------------------------------------------------------
-
-    Sauna_n    = GB_IMAX (Sauna_n, 1) ;     // must have at least one entry
-    Sauna_size = GB_IMAX (Sauna_size, 1) ;  // each entry must have size >= 1
-    Sauna->Sauna_hiwater = 1 ;              // Sauna_Mark [0..n-1] < hiwater
-    Sauna->Sauna_n = Sauna_n ;
-    Sauna->Sauna_size = Sauna_size ;
-
-    // note that Sauna_Work does not need to be initialized
-    GB_CALLOC_MEMORY (Sauna->Sauna_Mark, Sauna_n+1, sizeof (int64_t)) ;
-    GB_MALLOC_MEMORY (Sauna->Sauna_Work, Sauna_n+1, Sauna_size) ;
-
-    if (Sauna->Sauna_Mark == NULL || Sauna->Sauna_Work == NULL)
-    { 
-        // out of memory
-        GB_Sauna_free (Sauna_id) ;
-        return (GrB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // make the Sauna persistent
-    //--------------------------------------------------------------------------
-
-    // This is required for the MATLAB interface
-    GB_Global_persist_function (Sauna) ;
-    GB_Global_persist_function (Sauna->Sauna_Mark) ;
-    GB_Global_persist_function (Sauna->Sauna_Work) ;
-
-    //--------------------------------------------------------------------------
-    // return result
-    //--------------------------------------------------------------------------
-
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_Sauna_free.c b/Source/GB_Sauna_free.c
deleted file mode 100644
index ae24288ba7..0000000000
--- a/Source/GB_Sauna_free.c
+++ /dev/null
@@ -1,30 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_Sauna_free: free a Sauna
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-#include "GB_Sauna.h"
-
-void GB_Sauna_free                  // free a Sauna
-(
-    int Sauna_id                    // id of Sauna to free
-)
-{
-
-    GB_Sauna Sauna = GB_Global_Saunas_get (Sauna_id) ;
-    if (Sauna != NULL)
-    { 
-        // free all content of the Sauna
-        size_t Sauna_n = Sauna->Sauna_n ;
-        GB_FREE_MEMORY (Sauna->Sauna_Mark, Sauna_n+1, sizeof (int64_t)) ;
-        GB_FREE_MEMORY (Sauna->Sauna_Work, Sauna_n+1, Sauna->Sauna_size) ;
-        // free the header of the Sauna itself
-        GB_FREE_MEMORY (Sauna, 1, sizeof (struct GB_Sauna_struct)) ;
-        GB_Global_Saunas_set (Sauna_id, NULL) ;
-    }
-}
-
diff --git a/Source/GB_Sauna_release.c b/Source/GB_Sauna_release.c
deleted file mode 100644
index 0fb8ad882c..0000000000
--- a/Source/GB_Sauna_release.c
+++ /dev/null
@@ -1,68 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_Sauna_release: release a set of Saunas
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// A set of threads has finished computing C=A*B, and can now release their
-// thread-specific Sauna workspaces.  This does not need to be done in a
-// critical section, but doing so keeps the Sauna acquire/release in a single
-// batch, for each group of threads.  Another user thread can start C=A*B just
-// as these Saunas are being released, which would lead to an intermingled set
-// of Saunas.  It would work just fine, but might not be optimal.  The critical
-// section is very short.
-
-#include "GB_Sauna.h"
-
-GrB_Info GB_Sauna_release
-(
-    int nthreads,           // number of internal threads that have a Sauna
-    int *Sauna_ids          // size nthreads, the Sauna id's to release
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Sauna_ids != NULL) ;
-    ASSERT (nthreads >= 1) ;
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-        // printf ("tid %d releasing %d\n", tid, Sauna_ids [tid]) ; 
-        ASSERT (Sauna_ids [tid] >= -2 && Sauna_ids [tid] < GxB_NTHREADS_MAX) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // release the Saunas
-    //--------------------------------------------------------------------------
-
-    bool ok = true ;
-
-    // define the work to do inside the critical section
-    #define GB_CRITICAL_SECTION                                             \
-    {                                                                       \
-        for (int tid = 0 ; tid < nthreads ; tid++)                          \
-        {                                                                   \
-            int Sauna_id = Sauna_ids [tid] ;                                \
-            if (Sauna_id >= 0)                                              \
-            {                                                               \
-                /* release the Sauna previously acquired for thread tid */  \
-                GB_Global_Sauna_in_use_set (Sauna_id, false) ;              \
-            }                                                               \
-        }                                                                   \
-    }
-
-    //--------------------------------------------------------------------------
-    // do the critical section, depending on user threading model
-    //--------------------------------------------------------------------------
-
-    #include "GB_critical_section.c"
-
-    // GrB_PANIC if the critical section fails
-    return (ok ? GrB_SUCCESS : GrB_PANIC) ;
-}
-
diff --git a/Source/GB_Scalar_check.c b/Source/GB_Scalar_check.c
index c28f634548..f0dbc24704 100644
--- a/Source/GB_Scalar_check.c
+++ b/Source/GB_Scalar_check.c
@@ -2,7 +2,7 @@
 // GB_Scalar_check: print a GraphBLAS GxB_Scalar and check if it is valid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_SelectOp_check.c b/Source/GB_SelectOp_check.c
index 06ef1ba8f0..b79271e074 100644
--- a/Source/GB_SelectOp_check.c
+++ b/Source/GB_SelectOp_check.c
@@ -2,14 +2,11 @@
 // GB_SelectOp_check: check and print a select operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_SelectOp_check  // check a GraphBLAS select operator
@@ -44,13 +41,9 @@ GrB_Info GB_SelectOp_check  // check a GraphBLAS select operator
 
     if (pr > 0)
     {
-        if (op->opcode == GB_USER_SELECT_C_opcode)
-        { 
-            GBPR ("(compile-time user-defined) ") ;
-        }
-        else if (op->opcode == GB_USER_SELECT_R_opcode)
+        if (op->opcode >= GB_USER_SELECT_opcode)
         { 
-            GBPR ("(run-time user-defined) ") ;
+            GBPR ("(user-defined) ") ;
         }
         else
         { 
@@ -60,7 +53,7 @@ GrB_Info GB_SelectOp_check  // check a GraphBLAS select operator
 
     GBPR0 ("C=%s(A,k)\n", op->name) ;
 
-    if (op->function == NULL && op->opcode >= GB_USER_SELECT_C_opcode)
+    if (op->function == NULL && op->opcode >= GB_USER_SELECT_opcode)
     { 
         GBPR0 ("    function pointer is NULL\n") ;
         return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
@@ -68,7 +61,7 @@ GrB_Info GB_SelectOp_check  // check a GraphBLAS select operator
             GB_NAME, op->name))) ;
     }
 
-    if (op->opcode < GB_TRIL_opcode || op->opcode > GB_USER_SELECT_R_opcode)
+    if (op->opcode < GB_TRIL_opcode || op->opcode > GB_USER_SELECT_opcode)
     { 
         GBPR0 ("    invalid opcode\n") ;
         return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
diff --git a/Source/GB_SelectOp_new.c b/Source/GB_SelectOp_new.c
index 3b016980ec..f1c2ed156e 100644
--- a/Source/GB_SelectOp_new.c
+++ b/Source/GB_SelectOp_new.c
@@ -2,7 +2,7 @@
 // GB_SelectOp_new: create a new select operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -54,7 +54,7 @@ GrB_Info GB_SelectOp_new        // create a new user-defined select operator
     op->ttype = ttype ;
     op->function = function ;
     strncpy (op->name, name, GB_LEN-1) ;
-    op->opcode = GB_USER_SELECT_R_opcode ;
+    op->opcode = GB_USER_SELECT_opcode ;
     ASSERT_SELECTOP_OK (op, "new user-defined select op", GB0) ;
     return (GrB_SUCCESS) ;
 }
diff --git a/Source/GB_Semiring_check.c b/Source/GB_Semiring_check.c
index 96fa8fec67..4dd63f270a 100644
--- a/Source/GB_Semiring_check.c
+++ b/Source/GB_Semiring_check.c
@@ -2,14 +2,11 @@
 // GB_Semiring_check: check and print a semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_Semiring_check          // check a GraphBLAS semiring
@@ -48,12 +45,8 @@ GrB_Info GB_Semiring_check          // check a GraphBLAS semiring
             GBPR0 ("(built-in)") ;
             break ;
 
-        case GB_USER_COMPILED:
-            GBPR0 ("(user-defined at compile-time)") ;
-            break ;
-
         case GB_USER_RUNTIME:
-            GBPR0 ("(user-defined at run-time)") ;
+            GBPR0 ("(user-defined)") ;
             break ;
 
         default:
diff --git a/Source/GB_Type_check.c b/Source/GB_Type_check.c
index 9fe2a8d4f6..0f01407a40 100644
--- a/Source/GB_Type_check.c
+++ b/Source/GB_Type_check.c
@@ -2,7 +2,7 @@
 // GB_Type_check: print a built-in type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,9 +12,6 @@
 // matrix A:", for example.  The internal name is the C typedef with which the
 // GraphBLAS GrB_Type was created.
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_Type_check      // check a GraphBLAS Type
@@ -61,12 +58,7 @@ GrB_Info GB_Type_check      // check a GraphBLAS Type
         case GB_UINT64_code : GBPR0 ("uint64_t" ) ; break ;
         case GB_FP32_code   : GBPR0 ("float"    ) ; break ;
         case GB_FP64_code   : GBPR0 ("double"   ) ; break ;
-        case GB_UCT_code    :
-            GBPR0 ("compile-time user-defined: [%s]", type->name) ;
-            break ;
-        case GB_UDT_code    :
-            GBPR0 ("run-time user-defined: [%s]", type->name) ;
-            break ;
+        case GB_UDT_code    : GBPR0 ("user-defined: [%s]", type->name) ; break ;
         default             : GBPR0 ("unknown type\n") ;
             return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
                 "Type code %d is unknown: %s [%s]",
diff --git a/Source/GB_Type_compatible.c b/Source/GB_Type_compatible.c
index 2a25eb1037..0311e3e41f 100644
--- a/Source/GB_Type_compatible.c
+++ b/Source/GB_Type_compatible.c
@@ -2,7 +2,7 @@
 // GB_Type_compatible: return true if domains are compatible
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -19,8 +19,7 @@ bool GB_Type_compatible             // check if two types can be typecast
 )
 {
 
-    if (atype->code == GB_UCT_code || btype->code == GB_UCT_code ||
-        atype->code == GB_UDT_code || btype->code == GB_UDT_code)
+    if (atype->code == GB_UDT_code || btype->code == GB_UDT_code)
     { 
         // two user types must be identical to be compatible
         return (atype == btype) ;
diff --git a/Source/GB_Type_new.c b/Source/GB_Type_new.c
index e178213769..03ce40045a 100644
--- a/Source/GB_Type_new.c
+++ b/Source/GB_Type_new.c
@@ -2,7 +2,7 @@
 // GB_Type_new: create a new user-defined type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,11 +30,11 @@ GrB_Info GB_Type_new
 
     #if ( ! GB_HAS_VLA )
 
-        if (sizeof_ctype > GB_VLA_MAXSIZE_FOR_ANY_GRB_TYPE)
+        if (sizeof_ctype > GB_VLA_MAXSIZE)
         {
             return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG, "user-defined types"
                 " are limited to %d bytes (ANSI C99 or later is required)",
-                GB_VLA_MAXSIZE_FOR_ANY_GRB_TYPE))) ;
+                GB_VLA_MAXSIZE))) ;
         }
 
     #endif
@@ -55,7 +55,7 @@ GrB_Info GB_Type_new
     GrB_Type t = *type ;
     t->magic = GB_MAGIC ;
     t->size = GB_IMAX (sizeof_ctype, 1) ;
-    t->code = GB_UDT_code ;     // run-time user-defined type
+    t->code = GB_UDT_code ;     // user-defined type
 
     //--------------------------------------------------------------------------
     // get the name
diff --git a/Source/GB_UnaryOp_check.c b/Source/GB_UnaryOp_check.c
index e7029fa40a..4692a0d133 100644
--- a/Source/GB_UnaryOp_check.c
+++ b/Source/GB_UnaryOp_check.c
@@ -2,14 +2,11 @@
 // GB_UnaryOp_check: check and print a unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// for additional diagnostics, use:
-// #define GB_DEVELOPER 1
-
 #include "GB_printf.h"
 
 GrB_Info GB_UnaryOp_check   // check a GraphBLAS unary operator
@@ -43,14 +40,10 @@ GrB_Info GB_UnaryOp_check   // check a GraphBLAS unary operator
     GB_CHECK_MAGIC (op, "UnaryOp") ;
 
     if (pr > 0)
-    { 
-        if (op->opcode == GB_USER_C_opcode)
-        { 
-            GBPR ("(compile-time user-defined) ") ;
-        }
-        else if (op->opcode == GB_USER_R_opcode)
+    {
+        if (op->opcode >= GB_USER_opcode)
         { 
-            GBPR ("(run-time user-defined) ") ;
+            GBPR ("(user-defined) ") ;
         }
         else
         { 
@@ -74,8 +67,7 @@ GrB_Info GB_UnaryOp_check   // check a GraphBLAS unary operator
           op->opcode == GB_ABS_opcode ||
           op->opcode == GB_MINV_opcode ||
           op->opcode == GB_LNOT_opcode ||
-          op->opcode == GB_USER_C_opcode ||       // unary or binary
-          op->opcode == GB_USER_R_opcode))        // unary or binary
+          op->opcode == GB_USER_opcode))        // unary or binary
     { 
         GBPR0 ("    invalid opcode\n") ;
         return (GB_ERROR (GrB_INVALID_OBJECT, (GB_LOG,
diff --git a/Source/GB_UnaryOp_new.c b/Source/GB_UnaryOp_new.c
index c4e6e6ec9d..2b148cbf83 100644
--- a/Source/GB_UnaryOp_new.c
+++ b/Source/GB_UnaryOp_new.c
@@ -2,7 +2,7 @@
 // GB_UnaryOp_new: create a new unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -55,7 +55,7 @@ GrB_Info GB_UnaryOp_new             // create a new user-defined unary operator
     op->ztype = ztype ;
     op->function = function ;
     strncpy (op->name, name, GB_LEN-1) ;
-    op->opcode = GB_USER_R_opcode ;     // run-time user-defined operator
+    op->opcode = GB_USER_opcode ;     // user-defined operator
     ASSERT_UNARYOP_OK (op, "new user-defined unary op", GB0) ;
     return (GrB_SUCCESS) ;
 }
diff --git a/Source/GB_Vector_check.c b/Source/GB_Vector_check.c
index 1fb85cf9ae..fd7c8eefe7 100644
--- a/Source/GB_Vector_check.c
+++ b/Source/GB_Vector_check.c
@@ -2,7 +2,7 @@
 // GB_Vector_check: print a GraphBLAS GrB_Vector and check if it is valid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_accum_mask.c b/Source/GB_accum_mask.c
index 4c8f7894b6..403a6e0d82 100644
--- a/Source/GB_accum_mask.c
+++ b/Source/GB_accum_mask.c
@@ -2,7 +2,7 @@
 // GB_accum_mask: accumulate results via the mask and accum operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -44,14 +44,6 @@
 // The descriptor affects how C and M are handled.  If the descriptor is
 // NULL, defaults are used.
 
-// desc [GB_MASK] = GxB_DEFAULT means to use M as-is
-
-// desc [GB_MASK] = GrB_SCMP means to use the logical negation of M
-
-// desc [GB_OUTP] = GxB_DEFAULT means to use C as-is.
-
-// desc [GB_OUTP] = GrB_REPLACE means to clear C before writing Z into C.
-
 #include "GB_subassign.h"
 #include "GB_add.h"
 #include "GB_mask.h"
@@ -138,6 +130,7 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
     GrB_Matrix *Thandle,        // results of computation, freed when done
     const bool C_replace,       // if true, clear C first
     const bool Mask_comp,       // if true, complement the mask
+    const bool Mask_struct,     // if true, use the only structure of M
     GB_Context Context
 )
 {
@@ -178,7 +171,7 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
     //--------------------------------------------------------------------------
 
     if (GB_PENDING_OR_ZOMBIES (T))
-    {
+    { 
         // if this fails, *Thandle must be freed
         GB_OK (GB_wait (T, Context)) ;
     }
@@ -187,11 +180,19 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
     // ensure M and T have the same CSR/CSC format as C
     //--------------------------------------------------------------------------
 
+    #if GB_BURBLE
+    bool T_transposed = false ;
+    bool M_transposed = false ;
+    #endif
+
     if (C->is_csc != T->is_csc)
     { 
         // transpose: no typecast, no op, in place of T, jumbled, but T
         // cannot have any zombies or pending tuples.
         GB_OK (GB_transpose (Thandle, NULL, C->is_csc, NULL, NULL, Context)) ;
+        #if GB_BURBLE
+        T_transposed = true ;
+        #endif
         T = (*Thandle) ;
     }
 
@@ -214,6 +215,9 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
             GB_OK (GB_transpose (&MT, GrB_BOOL, C->is_csc, M, NULL, Context)) ;
             // use the transpose mask
             M = MT ;
+            #if GB_BURBLE
+            M_transposed = true ;
+            #endif
         }
         else
         { 
@@ -254,15 +258,32 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
     // GB_transplant for Z=T and GB_transplant_conform in GB_mask for C=Z).
     // So in this case, GB_subassigner takes more work.
 
-    if ((M != NULL || accum != NULL) && (tnz + cnpending <= cnz)
-        && !GB_aliased (C, M) && !GB_aliased (C, T))
+    bool use_subassigner =
+        ((M != NULL || accum != NULL) && (tnz + cnpending <= cnz)
+            && !GB_aliased (C, M) && !GB_aliased (C, T)) ;
+
+    bool use_transplant = (!use_subassigner)
+        && (accum == NULL || (cnz + cnpending) == 0) ;
+
+    if (!use_transplant)
+    { 
+        GBBURBLE ("(C%s%s=Z via %s%s%s) ",
+            ((M == NULL) ? "" : ((Mask_comp) ? "<!M>" : "<M>")),
+            ((accum == NULL) ? "" : "+"),
+            ((use_subassigner) ? "assign" :
+                ((use_transplant) ? "transplant" : "add")),
+            (M_transposed ? "(M transposed)" : ""),
+            (T_transposed ? "(result transposed)" : "")) ;
+    }
+
+    if (use_subassigner)
     { 
 
         //----------------------------------------------------------------------
         // C(:,:)<M> = accum (C(:,:),T) via GB_subassigner
         //----------------------------------------------------------------------
 
-        GB_OK (GB_subassigner (C, C_replace, M, Mask_comp, accum,
+        GB_OK (GB_subassigner (C, C_replace, M, Mask_comp, Mask_struct, accum,
             T, GrB_ALL, 0, GrB_ALL, 0, false, NULL, GB_ignore_code, Context)) ;
 
     }
@@ -280,7 +301,7 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
         // see GB_spec_accum.m for a description of this step.  If C is empty,
         // then the accumulator can be ignored.
 
-        if (accum == NULL || (cnz + cnpending) == 0)
+        if (use_transplant)
         { 
 
             //------------------------------------------------------------------
@@ -318,7 +339,8 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
                 M1 = M ;
             }
 
-            GB_OK (GB_add (&Z, C->type, C->is_csc, M1, C, T, accum, Context)) ;
+            GB_OK (GB_add (&Z, C->type, C->is_csc, M1, Mask_struct, C, T,
+                accum, Context)) ;
             GB_MATRIX_FREE (Thandle) ;
         }
 
@@ -339,7 +361,7 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
 
         // apply the mask, storing the results back into C, and free Z.
         ASSERT_MATRIX_OK (C, "C<M>=Z input", GB0) ;
-        GB_OK (GB_mask (C, M, &Z, C_replace, Mask_comp, Context)) ;
+        GB_OK (GB_mask (C, M, &Z, C_replace, Mask_comp, Mask_struct, Context)) ;
         ASSERT (Z == NULL) ;
         ASSERT (!C->p_shallow && !C->h_shallow) ;
         ASSERT (!C->i_shallow && !C->x_shallow) ;
diff --git a/Source/GB_accum_mask.h b/Source/GB_accum_mask.h
index 98a34c4221..1b705f8a7b 100644
--- a/Source/GB_accum_mask.h
+++ b/Source/GB_accum_mask.h
@@ -2,7 +2,7 @@
 // GB_accum_mask.h: definitions for GB_accum_mask
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GB_accum_mask          // C<M> = accum (C,T)
     GrB_Matrix *Thandle,        // results of computation, freed when done
     const bool C_replace,       // if true, clear C first
     const bool Mask_comp,       // if true, complement the mask
+    const bool Mask_struct,     // if true, use the only structure of M
     GB_Context Context
 ) ;
 
diff --git a/Source/GB_add.c b/Source/GB_add.c
index a794d7137b..d1642f61b6 100644
--- a/Source/GB_add.c
+++ b/Source/GB_add.c
@@ -2,7 +2,7 @@
 // GB_add: C = A+B or C<M>=A+B, but not C<!M>=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -41,6 +41,7 @@ GrB_Info GB_add             // C=A+B or C<M>=A+B
     const GrB_Type ctype,   // type of output matrix C
     const bool C_is_csc,    // format of output matrix C
     const GrB_Matrix M,     // optional mask for C, unused if NULL
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,     // input A matrix
     const GrB_Matrix B,     // input B matrix
     const GrB_BinaryOp op,  // op to perform C = op (A,B)
@@ -52,6 +53,8 @@ GrB_Info GB_add             // C=A+B or C<M>=A+B
     // check inputs
     //--------------------------------------------------------------------------
 
+    GBBURBLE ((M == NULL) ? "add " : "masked_add ") ;
+
     ASSERT (Chandle != NULL) ;
     ASSERT_MATRIX_OK (A, "A for add", GB0) ;
     ASSERT_MATRIX_OK (B, "B for add", GB0) ;
@@ -132,7 +135,7 @@ GrB_Info GB_add             // C=A+B or C<M>=A+B
         // from phase0:
         Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
         // original input:
-        M, A, B, Context) ;
+        M, Mask_struct, A, B, Context) ;
 
     if (info != GrB_SUCCESS)
     { 
@@ -162,7 +165,7 @@ GrB_Info GB_add             // C=A+B or C<M>=A+B
         // from phase0:
         Cnvec, Ch, C_to_M, C_to_A, C_to_B, Ch_is_Mh,
         // original input:
-        M, A, B, Context) ;
+        M, Mask_struct, A, B, Context) ;
 
     // free workspace
     GB_FREE_MEMORY (TaskList, max_ntasks+1, sizeof (GB_task_struct)) ;
diff --git a/Source/GB_add.h b/Source/GB_add.h
index 20a4d1c7b8..d7047197d2 100644
--- a/Source/GB_add.h
+++ b/Source/GB_add.h
@@ -2,7 +2,7 @@
 // GB_add.h: definitiions for GB_add and related functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_add             // C=A+B or C<M>=A+B
     const GrB_Type ctype,   // type of output matrix C
     const bool C_is_csc,    // format of output matrix C
     const GrB_Matrix M,     // optional mask for C, unused if NULL
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,     // input A matrix
     const GrB_Matrix B,     // input B matrix
     const GrB_BinaryOp op,  // op to perform C = op (A,B)
@@ -55,6 +56,7 @@ GrB_Info GB_add_phase1                  // count nnz in each C(:,j)
     const bool Ch_is_Mh,                // if true, then Ch == M->h
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
@@ -82,6 +84,7 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
     const bool Ch_is_Mh,        // if true, then Ch == M->h
     // original input:
     const GrB_Matrix M,         // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
diff --git a/Source/GB_add_phase0.c b/Source/GB_add_phase0.c
index 5c99e2cbc3..429afd3630 100644
--- a/Source/GB_add_phase0.c
+++ b/Source/GB_add_phase0.c
@@ -2,7 +2,7 @@
 // GB_add_phase0: find vectors of C to compute for C=A+B or C<M>=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_add_phase1.c b/Source/GB_add_phase1.c
index 644f931d80..d5cf48b257 100644
--- a/Source/GB_add_phase1.c
+++ b/Source/GB_add_phase1.c
@@ -2,7 +2,7 @@
 // GB_add_phase1: find # of entries in C=A+B or C<M>=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -41,6 +41,7 @@ GrB_Info GB_add_phase1                  // count nnz in each C(:,j)
     const bool Ch_is_Mh,                // if true, then Ch == M->h
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
diff --git a/Source/GB_add_phase2.c b/Source/GB_add_phase2.c
index 7164d22c2c..44d2a8c67e 100644
--- a/Source/GB_add_phase2.c
+++ b/Source/GB_add_phase2.c
@@ -2,7 +2,7 @@
 // GB_add_phase2: C=A+B or C<M>=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -52,6 +52,7 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
     const bool Ch_is_Mh,        // if true, then Ch == M->h
     // original input:
     const GrB_Matrix M,         // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
@@ -146,7 +147,7 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
 
     #define GB_BINOP_WORKER(mult,xyname)                            \
     {                                                               \
-        info = GB_AaddB(mult,xyname) (C, M, A, B, Ch_is_Mh,         \
+        info = GB_AaddB(mult,xyname) (C, M, Mask_struct, A, B, Ch_is_Mh, \
             C_to_M, C_to_A, C_to_B, TaskList, ntasks, nthreads) ;   \
         done = (info != GrB_NO_VALUE) ;                             \
     }                                                               \
@@ -159,7 +160,7 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
     GB_Opcode opcode ;
     GB_Type_code xycode, zcode ;
 
-    if (GB_binop_builtin (A, false, B, false, op,
+    if (GB_binop_builtin (A->type, false, B->type, false, op,
         false, &opcode, &xycode, &zcode) && ccode == zcode)
     { 
         #include "GB_binop_factory.c"
@@ -174,6 +175,7 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
 
     if (!done)
     {
+        GB_BURBLE_MATRIX (C, "generic ") ;
 
         GxB_binary_function fadd ;
         size_t csize, asize, bsize, xsize, ysize, zsize ;
@@ -248,36 +250,22 @@ GrB_Info GB_add_phase2      // C=A+B or C<M>=A+B
 
         #define GB_PHASE_2_OF_2
 
-        #include "GB_add_template.c"
+        // loops cannot be vectorized
+        #define GB_PRAGMA_VECTORIZE
 
+        #include "GB_add_template.c"
     }
 
     //--------------------------------------------------------------------------
     // remove empty vectors from C, if hypersparse
     //--------------------------------------------------------------------------
 
-    if (C_is_hyper && C->nvec_nonempty < Cnvec)
-    {
-        // create new Cp_new and Ch_new arrays, with no empty vectors
-        int64_t *GB_RESTRICT Cp_new = NULL ;
-        int64_t *GB_RESTRICT Ch_new = NULL ;
-        int64_t nvec_new ;
-        info = GB_hyper_prune (&Cp_new, &Ch_new, &nvec_new, C->p, C->h, Cnvec,
-            Context) ;
-        if (info != GrB_SUCCESS)
-        { 
-            // out of memory
-            GB_MATRIX_FREE (&C) ;
-            return (info) ;
-        }
-        // transplant the new hyperlist into C
-        GB_FREE_MEMORY (C->p, Cnvec+1, sizeof (int64_t)) ;
-        GB_FREE_MEMORY (C->h, Cnvec,   sizeof (int64_t)) ;
-        C->p = Cp_new ;
-        C->h = Ch_new ;
-        C->nvec = nvec_new ;
-        C->plen = nvec_new ;
-        ASSERT (C->nvec == C->nvec_nonempty) ;
+    info = GB_hypermatrix_prune (C, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_MATRIX_FREE (&C) ;
+        return (info) ;
     }
 
     //--------------------------------------------------------------------------
diff --git a/Source/GB_aliased.c b/Source/GB_aliased.c
index 8cf57f01da..12fa2f006d 100644
--- a/Source/GB_aliased.c
+++ b/Source/GB_aliased.c
@@ -2,7 +2,7 @@
 // GB_aliased: determine if two matrices are aliased
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_apply.c b/Source/GB_apply.c
index 3615218eb4..0d9f803ebe 100644
--- a/Source/GB_apply.c
+++ b/Source/GB_apply.c
@@ -2,7 +2,7 @@
 // GB_apply: apply a unary operator; optionally transpose a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GB_apply                   // C<M> = accum (C, op(A)) or op(A')
     const bool C_replace,           // C descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // M descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_UnaryOp op,           // operator to apply to the entries
     const GrB_Matrix A,             // first input:  matrix A
@@ -101,13 +102,24 @@ GrB_Info GB_apply                   // C<M> = accum (C, op(A)) or op(A')
     { 
         // T = op (A'), typecasting to op->ztype
         // transpose: typecast, apply an op, not in place
+        GBBURBLE ("(transpose-op) ") ;
         info = GB_transpose (&T, T_type, C_is_csc, A, op, Context) ;
     }
+    else if (M == NULL && accum == NULL && (C == A) && C->type == op->ztype)
+    {
+        GBBURBLE ("(inplace-op) ") ;
+        // C = op (C), operating on the values in place, with no typecasting
+        // of the output of the operator with the matrix C.  Always succeeds.
+        // FUTURE::: also handle C += op(C), with accum
+        GB_apply_op (C->x, op, C->x, C->type, GB_NNZ (C), Context) ;
+        return (GrB_SUCCESS) ;
+    }
     else
     { 
         // T = op (A), pattern is a shallow copy of A, type is op->ztype.  If
         // op is the built-in IDENTITY and A->type is op->xtype == op->ztype,
         // then a pure shallow copy is made.
+        GBBURBLE ("(shallow-op) ") ;
         info = GB_shallow_op (&T, C_is_csc, op, A, Context) ;
     }
 
@@ -124,6 +136,6 @@ GrB_Info GB_apply                   // C<M> = accum (C, op(A)) or op(A')
     //--------------------------------------------------------------------------
 
     return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+        Mask_struct, Context)) ;
 }
 
diff --git a/Source/GB_apply.h b/Source/GB_apply.h
index 1d901e552a..dd4b5c847b 100644
--- a/Source/GB_apply.h
+++ b/Source/GB_apply.h
@@ -2,7 +2,7 @@
 // GB_apply.h: definitions for GB_apply
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_apply                   // C<M> = accum (C, op(A)) or op(A')
     const bool C_replace,           // C descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // M descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_UnaryOp op,           // operator to apply to the entries
     const GrB_Matrix A,             // first input:  matrix A
diff --git a/Source/GB_apply_op.c b/Source/GB_apply_op.c
index 9c0fb1963b..e187cb999c 100644
--- a/Source/GB_apply_op.c
+++ b/Source/GB_apply_op.c
@@ -2,13 +2,14 @@
 // GB_apply_op: typecast and apply a unary operator to an array
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 // Cx = op ((xtype) Ax)
 
+// Cx and Ax may be aliased.
 // Compare with GB_transpose_op.c
 
 #include "GB_apply.h"
@@ -19,11 +20,11 @@
 
 void GB_apply_op            // apply a unary operator, Cx = op ((xtype) Ax)
 (
-    GB_void *GB_RESTRICT Cx,           // output array, of type op->ztype
-    const GrB_UnaryOp op,           // operator to apply
-    const GB_void *GB_RESTRICT Ax,     // input array, of type Atype
-    const GrB_Type Atype,           // type of Ax
-    const int64_t anz,              // size of Ax and Cx
+    GB_void *Cx,                // output array, of type op->ztype
+    const GrB_UnaryOp op,       // operator to apply
+    const GB_void *Ax,          // input array, of type Atype
+    const GrB_Type Atype,       // type of Ax
+    const int64_t anz,          // size of Ax and Cx
     GB_Context Context
 )
 {
@@ -60,7 +61,7 @@ void GB_apply_op            // apply a unary operator, Cx = op ((xtype) Ax)
     #define GB_WORKER(op,zname,ztype,aname,atype)                           \
     {                                                                       \
         GrB_Info info = GB_unop (op,zname,aname) ((ztype *) Cx,             \
-            (const atype *) Ax, anz, nthreads) ;                            \
+            (atype *) Ax, anz, nthreads) ;                                  \
         if (info == GrB_SUCCESS) return ;                                   \
     }                                                                       \
     break ;
@@ -77,6 +78,8 @@ void GB_apply_op            // apply a unary operator, Cx = op ((xtype) Ax)
     // generic worker: typecast and apply an operator
     //--------------------------------------------------------------------------
 
+    GB_BURBLE_N (anz, "generic ") ;
+
     size_t asize = Atype->size ;
     size_t zsize = op->ztype->size ;
     size_t xsize = op->xtype->size ;
diff --git a/Source/GB_assign.c b/Source/GB_assign.c
index 01aa00af8f..4695113fe8 100644
--- a/Source/GB_assign.c
+++ b/Source/GB_assign.c
@@ -2,7 +2,7 @@
 // GB_assign: submatrix assignment: C<M>(Rows,Cols) = accum (C(Rows,Cols),A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -44,9 +44,10 @@
 GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
 (
     GrB_Matrix C,                   // input/output matrix for results
-    const bool C_replace,           // descriptor for C
+    bool C_replace,                 // descriptor for C
     const GrB_Matrix M_in,          // optional mask for C
     const bool Mask_comp,           // true if mask is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     bool M_transpose,               // true if the mask should be transposed
     const GrB_BinaryOp accum,       // optional accum for accum(C,T)
     const GrB_Matrix A_in,          // input matrix
@@ -122,6 +123,8 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
     GB_ijlength (Rows, nRows_in, GB_NROWS (C), &nRows, &RowsKind, RowColon) ;
     GB_ijlength (Cols, nCols_in, GB_NCOLS (C), &nCols, &ColsKind, ColColon) ;
 
+    bool whole_C_matrix = (RowsKind == GB_ALL && ColsKind == GB_ALL) ;
+
     bool C_is_csc = C->is_csc ;
 
     //--------------------------------------------------------------------------
@@ -257,12 +260,14 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
                 if ((row_assign && !C_is_csc) || (col_assign && C_is_csc))
                 { 
                     // delete all entries in vector j
+                    GBBURBLE ("C(:,j)=zombie ") ;
                     int64_t j = (col_assign) ? Cols [0] : Rows [0] ;
                     GB_assign_zombie1 (C, j, Context) ;
                 }
                 else
                 { 
                     // delete all entries in each vector with index i
+                    GBBURBLE ("C(i,:)=zombie ") ;
                     int64_t i = (row_assign) ? Rows [0] : Cols [0] ;
                     GB_assign_zombie2 (C, i, Context) ;
                 }
@@ -288,36 +293,36 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
     }
 
     //--------------------------------------------------------------------------
-    // allocate workspace for final C_replace phase
+    // determine if the final C_replace phase is needed
     //--------------------------------------------------------------------------
 
-    // whole_matrix is true if C(:,:)=A is being computed (the submatrix is
+    // whole_submatrix is true if C(:,:)=A is being computed (the submatrix is
     // all of C), or all that the operation can modify for row/col assign.
 
-    bool whole_matrix ;
+    bool whole_submatrix ;
     if (row_assign)
     { 
         // row assignment to the entire row
-        whole_matrix = (ColsKind == GB_ALL) ;
+        whole_submatrix = (ColsKind == GB_ALL) ;
     }
     else if (col_assign)
     { 
         // col assignment to the entire column
-        whole_matrix = (RowsKind == GB_ALL) ;
+        whole_submatrix = (RowsKind == GB_ALL) ;
     }
     else
     { 
         // matrix assignment to the entire matrix
-        whole_matrix = (RowsKind == GB_ALL && ColsKind == GB_ALL) ;
+        whole_submatrix = whole_C_matrix ;
     }
 
     // Mask_is_same is true if SubMask == M (:,:)
-    bool Mask_is_same = (M == NULL || whole_matrix) ;
+    bool Mask_is_same = (M == NULL || whole_submatrix) ;
 
     // C_replace_phase is true if a final pass over all of C is required
     // to delete entries outside the C(I,J) submatrix.
     bool C_replace_phase = (C_replace && !Mask_is_same) ;
-    ASSERT (!Mask_is_same == (M != NULL && !whole_matrix)) ;
+    ASSERT (!Mask_is_same == (M != NULL && !whole_submatrix)) ;
 
     //--------------------------------------------------------------------------
     // apply pending updates to A and M
@@ -444,6 +449,7 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
     { 
         // AT = A', with no typecasting
         // transpose: no typecast, no op, not in place
+        GBBURBLE ("(A transpose) ") ;
         GB_OK (GB_transpose (&AT, NULL, C_is_csc, A, NULL, Context)) ;
         A = AT ;
     }
@@ -546,6 +552,7 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
             // MT = M' to conform M to the same CSR/CSC format as C.
             // typecast to boolean, if a full matrix transpose is done.
             // transpose: typecast, no op, not in place
+            GBBURBLE ("(M transpose) ") ;
             GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M, NULL, Context)) ;
             M = MT ;
         }
@@ -572,17 +579,51 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
     }
 
     if (C_aliased)
-    { 
-        // Z2 = duplicate of C, which must be freed when done
+    {
+        // If C is aliased, it no longer has any pending work, A and M have
+        // been finished, above.  This also ensures GB_dup does not need to
+        // finish any pending work in C.
+        GBBURBLE ("(C aliased) ") ;
         ASSERT (!GB_ZOMBIES (C)) ;
         ASSERT (!GB_PENDING (C)) ;
-        GB_OK (GB_dup (&Z2, C, true, NULL, Context)) ;
+        if (whole_C_matrix && C_replace && accum == NULL)
+        { 
+            // C(:,:)<any mask, replace> = A or x, with C aliased to M or A.  C
+            // is about to be cleared in GB_subassigner anyway, but a duplicate
+            // is needed because C is aliased with M or A.  Instead of
+            // duplicating it, create an empty matrix Z2.  This also prevents
+            // the C_replace_phase from being needed.
+            GB_NEW (&Z2, C->type, C->vlen, C->vdim, GB_Ap_calloc, C->is_csc,
+                GB_SAME_HYPER_AS (C->is_hyper), C->hyper_ratio, 1, Context) ;
+            GB_OK (info)  ;
+            GBBURBLE ("(C alias cleared; C_replace early) ") ;
+            C_replace = false ;
+            C_replace_phase = false ;
+        }
+        else
+        { 
+            // Z2 = duplicate of C, which must be freed when done
+            GB_OK (GB_dup (&Z2, C, true, NULL, Context)) ;
+        }
         Z = Z2 ;
     }
     else
-    { 
+    {
         // GB_subassigner can safely operate on C in place and so can the
         // C_replace_phase below.
+        // FUTURE:  if C is dense and will remain so,
+        // it would be faster to delay the clearing of C.
+        if (whole_C_matrix && C_replace && accum == NULL)
+        { 
+            // C(:,:)<any mask, replace> = A or x, with C not aliased to M or
+            // A.  C is about to be cleared in GB_subassigner anyway, so clear
+            // it now.  This also prevents the C_replace_phase from being
+            // needed.
+            GB_OK (GB_clear (C, Context)) ;
+            GBBURBLE ("(C(:,:)<any mask>: C_replace early) ") ;
+            C_replace = false ;
+            C_replace_phase = false ;
+        }
         Z = C ;
     }
 
@@ -592,7 +633,7 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
 
     GB_OK (GB_subassigner (
         Z,          C_replace,      // Z matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         accum,                      // for accum (C(I,J),A)
         A,                          // A matrix, NULL for scalar expansion
         I, ni,                      // indices
@@ -627,9 +668,9 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
         // M_in(I,J)=1 is true, so C_replace has no effect outside the Z(I,J)
         // submatrix.
 
-        // Also, if whole_matrix is true, then there is nothing outside the
-        // Z(I,J) submatrix to modify, so this phase is skipped if whole_matrix
-        // is true.
+        // Also, if whole_submatrix is true, then there is nothing outside the
+        // Z(I,J) submatrix to modify, so this phase is skipped if
+        // whole_submatrix is true.
 
         // This code requires Z and M_in not to be aliased to each other.
 
@@ -662,6 +703,7 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
             // MT = M' to conform M to the same CSR/CSC format as C.
             // typecast to boolean, if a full matrix transpose is done.
             // transpose: typecast, no op, not in place
+            GBBURBLE ("(M transpose) ") ;
             GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M, NULL, Context)) ;
             M = MT ;
         }
@@ -716,8 +758,9 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
             int64_t j = J [0] ;
             ASSERT (j == GB_ijlist (J, 0, Jkind, Jcolon)) ;
 
-            GB_assign_zombie3 (Z, M, Mask_comp, j, I, nI, Ikind, Icolon,
-                Context) ;
+            GBBURBLE ("assign zombies outside C(I,j) ") ;
+            GB_assign_zombie3 (Z, M, Mask_comp, Mask_struct,
+                j, I, nI, Ikind, Icolon, Context) ;
         }
         else if ((row_assign && C->is_csc) || (col_assign && !C->is_csc))
         { 
@@ -734,8 +777,9 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
             int64_t i = I [0] ;
             ASSERT (i == GB_ijlist (I, 0, Ikind, Icolon)) ;
 
-            GB_assign_zombie4 (Z, M, Mask_comp, i, J, nJ, Jkind, Jcolon,
-                Context) ;
+            GBBURBLE ("assign zombies outside C(i,J) ") ;
+            GB_assign_zombie4 (Z, M, Mask_comp, Mask_struct,
+                i, J, nJ, Jkind, Jcolon, Context) ;
         }
         else
         { 
@@ -747,7 +791,8 @@ GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
             // M has the same size as Z
             ASSERT (M->vlen == Z->vlen && M->vdim == Z->vdim) ;
 
-            GB_OK (GB_assign_zombie5 (Z, M, Mask_comp,
+            GBBURBLE ("assign zombies outside C(I,J) ") ;
+            GB_OK (GB_assign_zombie5 (Z, M, Mask_comp, Mask_struct,
                 I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon, Context)) ;
         }
 
diff --git a/Source/GB_assign.h b/Source/GB_assign.h
index a97979a8bc..bf26246752 100644
--- a/Source/GB_assign.h
+++ b/Source/GB_assign.h
@@ -2,7 +2,7 @@
 // GB_assign.h: definitions for GB_assign and related functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,9 +14,10 @@
 GrB_Info GB_assign                  // C<M>(Rows,Cols) += A or A'
 (
     GrB_Matrix C,                   // input/output matrix for results
-    const bool C_replace,           // descriptor for C
+    bool C_replace,                 // descriptor for C
     const GrB_Matrix M_in,          // optional mask for C
     const bool Mask_comp,           // true if mask is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     bool M_transpose,               // true if the mask should be transposed
     const GrB_BinaryOp accum,       // optional accum for accum(C,T)
     const GrB_Matrix A_in,          // input matrix
@@ -67,6 +68,7 @@ void GB_assign_zombie3
     GrB_Matrix Z,
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,         // if true, use the only structure of M
     const int64_t j,
     const GrB_Index *I,
     const int64_t nI,
@@ -80,6 +82,7 @@ void GB_assign_zombie4
     GrB_Matrix Z,
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,         // if true, use the only structure of M
     const int64_t i,
     const GrB_Index *J,
     const int64_t nJ,
@@ -93,6 +96,7 @@ GrB_Info GB_assign_zombie5
     GrB_Matrix Z,
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Index *I,
     const int64_t nI,
     const int Ikind,
diff --git a/Source/GB_assign_scalar.c b/Source/GB_assign_scalar.c
index c8f2d72482..265aac2f5c 100644
--- a/Source/GB_assign_scalar.c
+++ b/Source/GB_assign_scalar.c
@@ -2,7 +2,7 @@
 // GB_assign_scalar:    C<M>(Rows,Cols) = accum (C(Rows,Cols),x)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -45,7 +45,8 @@ GrB_Info GB_assign_scalar           // C<M>(Rows,Cols) += x
     ASSERT (scalar_code <= GB_UDT_code) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C<M>(Rows,Cols) = accum (C(Rows,Cols), scalar)
@@ -53,7 +54,7 @@ GrB_Info GB_assign_scalar           // C<M>(Rows,Cols) += x
 
     return (GB_assign (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         false,                      // do not transpose the mask
         accum,                      // for accum (C(Rows,Cols),scalar)
         NULL,       false,          // no explicit matrix A
diff --git a/Source/GB_assign_zombie1.c b/Source/GB_assign_zombie1.c
index b882a298e3..21064b9f93 100644
--- a/Source/GB_assign_zombie1.c
+++ b/Source/GB_assign_zombie1.c
@@ -2,7 +2,7 @@
 // GB_assign_zombie1: delete all entries in C(:,j) for GB_assign
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_assign_zombie2.c b/Source/GB_assign_zombie2.c
index 16fc35d1b2..5d91ba1452 100644
--- a/Source/GB_assign_zombie2.c
+++ b/Source/GB_assign_zombie2.c
@@ -2,7 +2,7 @@
 // GB_assign_zombie2: delete all entries in C(i,:) for GB_assign
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -60,7 +60,8 @@ void GB_assign_zombie2
             int64_t pC_end = Cp [k+1] ;
             int64_t pright = pC_end - 1 ;
             bool found, is_zombie ;
-            GB_BINARY_ZOMBIE (i, Ci, pC, pright, found, zorig, is_zombie) ;
+            GB_BINARY_SEARCH_ZOMBIE (i, Ci, pC, pright, found, zorig,
+                is_zombie) ;
 
             //------------------------------------------------------------------
             // if found and not a zombie, mark it as a zombie
diff --git a/Source/GB_assign_zombie3.c b/Source/GB_assign_zombie3.c
index 8917f6d9d7..ee5ff22448 100644
--- a/Source/GB_assign_zombie3.c
+++ b/Source/GB_assign_zombie3.c
@@ -2,7 +2,7 @@
 // GB_assign_zombie3: delete entries in C(:,j) for C_replace_phase
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ void GB_assign_zombie3
     GrB_Matrix Z,                   // the matrix C, or a copy
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,
     const int64_t j,                // vector index with entries to delete
     const GrB_Index *I,
     const int64_t nI,
@@ -48,10 +49,8 @@ void GB_assign_zombie3
 
     const int64_t *GB_RESTRICT Mp = M->p ;
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
-    const GB_cast_function cast_M =
-        GB_cast_factory (GB_BOOL_code, M->type->code) ;
     int64_t pM_start = Mp [0] ;
     int64_t pM_end = Mp [1] ;
 
@@ -107,7 +106,7 @@ void GB_assign_zombie3
                     if (found)
                     { 
                         // found it
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
+                        mij = GB_mcast (Mx, pM, msize) ;
                     }
                     if (Mask_comp)
                     { 
diff --git a/Source/GB_assign_zombie4.c b/Source/GB_assign_zombie4.c
index d19b546fbc..f7fd6bde44 100644
--- a/Source/GB_assign_zombie4.c
+++ b/Source/GB_assign_zombie4.c
@@ -2,7 +2,7 @@
 // GB_assign_zombie4: delete entries in C(i,:) for C_replace_phase
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,6 +20,7 @@ void GB_assign_zombie4
     GrB_Matrix Z,                   // the matrix C, or a copy
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,
     const int64_t i,                // index of entries to delete
     const GrB_Index *J,
     const int64_t nJ,
@@ -46,10 +47,8 @@ void GB_assign_zombie4
 
     const int64_t *GB_RESTRICT Mh = M->h ;
     const int64_t *GB_RESTRICT Mp = M->p ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
-    const GB_cast_function cast_M =
-        GB_cast_factory (GB_BOOL_code, M->type->code) ;
     const int64_t Mnvec = M->nvec ;
     const bool M_is_hyper = M->is_hyper ;
 
@@ -95,7 +94,8 @@ void GB_assign_zombie4
                 int64_t pZ_end = Zp [k+1] ;
                 int64_t pright = pZ_end - 1 ;
                 bool found, is_zombie ;
-                GB_BINARY_ZOMBIE (i, Zi, pZ, pright, found, zorig, is_zombie) ;
+                GB_BINARY_SEARCH_ZOMBIE (i, Zi, pZ, pright, found, zorig,
+                    is_zombie) ;
 
                 //--------------------------------------------------------------
                 // delete Z(i,j) if found, not a zombie, and M(0,j) allows it
@@ -119,7 +119,7 @@ void GB_assign_zombie4
                     if (pM < pM_end)
                     { 
                         // found it
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
+                        mij = GB_mcast (Mx, pM, msize) ;
                     }
                     if (Mask_comp)
                     { 
diff --git a/Source/GB_assign_zombie5.c b/Source/GB_assign_zombie5.c
index 7f21bc4bba..859faa7986 100644
--- a/Source/GB_assign_zombie5.c
+++ b/Source/GB_assign_zombie5.c
@@ -2,7 +2,7 @@
 // GB_assign_zombie5: delete entries in C for C_replace_phase
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -24,6 +24,7 @@ GrB_Info GB_assign_zombie5
     GrB_Matrix Z,                   // the matrix C, or a copy
     const GrB_Matrix M,
     const bool Mask_comp,
+    const bool Mask_struct,
     const GrB_Index *I,
     const int64_t nI,
     const int Ikind,
@@ -54,10 +55,8 @@ GrB_Info GB_assign_zombie5
     const int64_t *GB_RESTRICT Mh = M->h ;
     const int64_t *GB_RESTRICT Mp = M->p ;
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
-    const GB_cast_function cast_M =
-        GB_cast_factory (GB_BOOL_code, M->type->code) ;
     const int64_t Mnvec = M->nvec ;
     const bool M_is_hyper = M->is_hyper ;
 
@@ -68,6 +67,8 @@ GrB_Info GB_assign_zombie5
     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
     int nthreads = GB_nthreads (znz, chunk, nthreads_max) ;
     int ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
+    ntasks = GB_IMIN (ntasks, znz) ;
+    ntasks = GB_IMAX (ntasks, 1) ;
 
     //--------------------------------------------------------------------------
     // slice the entries for each task
@@ -161,7 +162,7 @@ GrB_Info GB_assign_zombie5
                     if (found)
                     { 
                         // found it
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
+                        mij = GB_mcast (Mx, pM, msize) ;
                     }
                     if (Mask_comp)
                     { 
diff --git a/Source/GB_atomics.h b/Source/GB_atomics.h
new file mode 100644
index 0000000000..75c311a53f
--- /dev/null
+++ b/Source/GB_atomics.h
@@ -0,0 +1,31 @@
+//------------------------------------------------------------------------------
+// GB_atomics.h: definitions for atomic pragmas
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#ifndef GB_ATOMICS_H
+#define GB_ATOMICS_H
+#include "GB.h"
+
+#if GB_MICROSOFT
+
+// FUTURE::: atomics with MS Visual Studio
+#define GB_ATOMIC_READ
+#define GB_ATOMIC_WRITE
+#define GB_ATOMIC_UPDATE  GB_PRAGMA (omp atomic)
+#define GB_ATOMIC_CAPTURE GB_PRAGMA (omp atomic)
+
+#else
+
+#define GB_ATOMIC_READ    GB_PRAGMA (omp atomic read)
+#define GB_ATOMIC_WRITE   GB_PRAGMA (omp atomic write)
+#define GB_ATOMIC_UPDATE  GB_PRAGMA (omp atomic update)
+#define GB_ATOMIC_CAPTURE GB_PRAGMA (omp atomic capture)
+
+#endif
+#endif
+
diff --git a/Source/GB_binop_builtin.c b/Source/GB_binop_builtin.c
index 5fb4621ab6..08d99489e3 100644
--- a/Source/GB_binop_builtin.c
+++ b/Source/GB_binop_builtin.c
@@ -2,7 +2,7 @@
 // GB_binop_builtin:  determine if a binary operator is built-in
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,7 +10,7 @@
 // Determine if A*B uses a built-in semiring, and if so, determine the
 // opcodes and type codes of the semiring.
 
-// If the op is NULL, then it is the implicit GrB_SECOND_[A->type] operator.
+// If the op is NULL, then it is the implicit GrB_SECOND_[A_type] operator.
 // This is a built-in operator for built-in types.  This feature is only used
 // by GB_wait.
 
@@ -21,9 +21,9 @@
 bool GB_binop_builtin               // true if binary operator is builtin
 (
     // inputs:
-    const GrB_Matrix A,
+    const GrB_Type A_type,
     const bool A_is_pattern,        // true if only the pattern of A is used
-    const GrB_Matrix B,
+    const GrB_Type B_type,
     const bool B_is_pattern,        // true if only the pattern of B is used
     const GrB_BinaryOp op,          // binary operator; may be NULL
     const bool flipxy,              // true if z=op(y,x), flipping x and y
@@ -47,11 +47,11 @@ bool GB_binop_builtin               // true if binary operator is builtin
     GrB_Type op_xtype, op_ytype, op_ztype ;
     if (op == NULL)
     { 
-        ASSERT (A->type == B->type) ;
+        ASSERT (A_type == B_type) ;
         (*opcode) = GB_SECOND_opcode ;
-        op_xtype = A->type ;
-        op_ytype = A->type ;
-        op_ztype = A->type ;
+        op_xtype = A_type ;
+        op_ytype = A_type ;
+        op_ztype = A_type ;
     }
     else
     { 
@@ -66,8 +66,8 @@ bool GB_binop_builtin               // true if binary operator is builtin
     // doesn't hold, punt to the generic function.
     if (!A_is_pattern)
     {
-        if ((A->type != (flipxy ? op_ytype : op_xtype)) ||
-            (A->type->code >= GB_UCT_code))
+        if ((A_type != (flipxy ? op_ytype : op_xtype)) ||
+            (A_type->code >= GB_UDT_code))
         { 
             // A is a user-defined type, or its type does not match the input
             // to the operator
@@ -77,8 +77,8 @@ bool GB_binop_builtin               // true if binary operator is builtin
 
     if (!B_is_pattern)
     {
-        if ((B->type != (flipxy ? op_xtype : op_ytype)) ||
-            (B->type->code >= GB_UCT_code))
+        if ((B_type != (flipxy ? op_xtype : op_ytype)) ||
+            (B_type->code >= GB_UDT_code))
         { 
             // B is a user-defined type, or its type does not match the input
             // to the operator
@@ -88,14 +88,14 @@ bool GB_binop_builtin               // true if binary operator is builtin
 
     if (!A_is_pattern && !B_is_pattern)
     {
-        if (A->type != B->type)
+        if (A_type != B_type)
         { 
             // the types of A and B must match
             return (false) ;
         }
     }
 
-    if (*opcode >= GB_USER_C_opcode)
+    if (*opcode >= GB_USER_opcode)
     { 
         // the binary operator is user-defined
         return (false) ;
diff --git a/Source/GB_block.c b/Source/GB_block.c
index 2122a28a1f..06773cab56 100644
--- a/Source/GB_block.c
+++ b/Source/GB_block.c
@@ -2,12 +2,12 @@
 // GB_block: apply all pending computations if blocking mode enabled
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-#include "GB.h"
+#include "GB_Pending.h"
 
 GrB_Info GB_block   // apply all pending computations if blocking mode enabled
 (
@@ -26,10 +26,7 @@ GrB_Info GB_block   // apply all pending computations if blocking mode enabled
     // check for blocking mode
     //--------------------------------------------------------------------------
 
-    // no synchronization is needed since the blocking mode is read-only after
-    // GrB_Init sets it.
-
-    if (GB_Global_mode_get ( ) == GrB_BLOCKING)
+    if (GB_shall_block (A))
     { 
         // delete any lingering zombies and assemble any pending tuples
         GB_WAIT (A) ;
diff --git a/Source/GB_boolean_rename.c b/Source/GB_boolean_rename.c
index 58a4fa4665..7ee73aec9a 100644
--- a/Source/GB_boolean_rename.c
+++ b/Source/GB_boolean_rename.c
@@ -2,7 +2,7 @@
 // GB_boolean_rename: rename a boolean opcode
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_bracket.h b/Source/GB_bracket.h
index 4f6761f83c..527818b087 100644
--- a/Source/GB_bracket.h
+++ b/Source/GB_bracket.h
@@ -2,7 +2,7 @@
 // GB_bracket.h: definitions for GB_bracket
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -38,7 +38,7 @@ static inline void GB_bracket_left
         // search for imin in X [kleft:kright]
         int64_t pleft = (*kleft) ;
         int64_t pright = kright ;
-        GB_BINARY_TRIM_SEARCH (imin, X, pleft, pright) ;
+        GB_TRIM_BINARY_SEARCH (imin, X, pleft, pright) ;
         (*kleft) = pleft ;
     }
 }
@@ -67,7 +67,7 @@ static inline void GB_bracket_right
         // search for imax in X [kleft:kright]
         int64_t pleft = kleft ;
         int64_t pright = (*kright) ;
-        GB_BINARY_TRIM_SEARCH (imax, X, pleft, pright) ;
+        GB_TRIM_BINARY_SEARCH (imax, X, pleft, pright) ;
         (*kright) = pleft ;
     }
 }
diff --git a/Source/GB_build.c b/Source/GB_build.c
index 4853a7fbaf..c172caae4a 100644
--- a/Source/GB_build.c
+++ b/Source/GB_build.c
@@ -2,7 +2,7 @@
 // GB_build: build a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_build.h b/Source/GB_build.h
index 94f692f411..d083dff5cc 100644
--- a/Source/GB_build.h
+++ b/Source/GB_build.h
@@ -2,7 +2,7 @@
 // GB_build.h: definitions for GB_build
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_builder.c b/Source/GB_builder.c
index 8cd29a207e..377d581c4a 100644
--- a/Source/GB_builder.c
+++ b/Source/GB_builder.c
@@ -2,7 +2,7 @@
 // GB_builder: build a matrix from tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -159,10 +159,11 @@ GrB_Info GB_builder                 // build a matrix from tuples
     //--------------------------------------------------------------------------
 
     GB_void *GB_RESTRICT S_work = (*S_work_handle) ;
+
     const GB_void *GB_RESTRICT S = (S_work == NULL) ? S_input : S_work ;
     size_t tsize = ttype->size ;
     size_t ssize = GB_code_size (scode, tsize) ;
-    ASSERT (S != NULL) ;
+    ASSERT (GB_IMPLIES (nvals > 0, S != NULL)) ;
 
     //==========================================================================
     // symbolic phase of the build =============================================
@@ -215,7 +216,7 @@ GrB_Info GB_builder                 // build a matrix from tuples
 
     if (tstart_slice == NULL || tnvec_slice == NULL || tnz_slice == NULL ||
         kbad == NULL || ilast_slice == NULL)
-    {
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GB_OUT_OF_MEMORY) ;
@@ -970,7 +971,7 @@ GrB_Info GB_builder                 // build a matrix from tuples
         if (T->i == NULL)
         { 
             // out of memory
-            GB_MATRIX_FREE (&T) ;
+            GB_MATRIX_FREE (Thandle) ;
             GB_FREE_WORK ;
             return (GB_OUT_OF_MEMORY) ;
         }
@@ -1097,12 +1098,8 @@ GrB_Info GB_builder                 // build a matrix from tuples
     size_t xsize = xtype->size ;
     size_t ysize = ytype->size ;
 
-    // so that tcode can match scode
-    GB_Type_code tcode2 = (tcode == GB_UCT_code) ? GB_UDT_code : tcode ;
-    GB_Type_code scode2 = (scode == GB_UCT_code) ? GB_UDT_code : scode ;
-
     // no typecasting if all 5 types are the same
-    bool nocasting = (tcode2 == scode2) &&
+    bool nocasting = (tcode == scode) &&
         (ttype == xtype) && (ttype == ytype) && (ttype == ztype) ;
 
     //--------------------------------------------------------------------------
@@ -1144,14 +1141,22 @@ GrB_Info GB_builder                 // build a matrix from tuples
         if (T->x == NULL)
         { 
             // out of memory
-            GB_MATRIX_FREE (&T) ;
+            GB_MATRIX_FREE (Thandle) ;
             GB_FREE_WORK ;
             return (GB_OUT_OF_MEMORY) ;
         }
 
         GB_void *GB_RESTRICT Tx = T->x ;
 
-        if (copy_S_into_T)
+        ASSERT (GB_IMPLIES (nvals > 0, S != NULL)) ;
+
+        if (nvals == 0)
+        { 
+
+            // nothing to do
+
+        }
+        else if (copy_S_into_T)
         { 
 
             //------------------------------------------------------------------
@@ -1225,6 +1230,7 @@ GrB_Info GB_builder                 // build a matrix from tuples
 
             if (!done)
             {
+                GB_BURBLE_N (nvals, "generic ") ;
 
                 //--------------------------------------------------------------
                 // no typecasting, but use the fdup function pointer and memcpy
@@ -1276,6 +1282,8 @@ GrB_Info GB_builder                 // build a matrix from tuples
             // assemble the values S into T, typecasting as needed
             //------------------------------------------------------------------
 
+            GB_BURBLE_N (nvals, "generic ") ;
+
             // S (either S_work or S_input) must be permuted and copied into
             // T->x, since the tuples had to be sorted, or duplicates appear.
             // Any duplicates are now assembled.  Not all of the 5 types are
diff --git a/Source/GB_calloc_memory.c b/Source/GB_calloc_memory.c
index 3e2d01e21c..8a5e7cbeac 100644
--- a/Source/GB_calloc_memory.c
+++ b/Source/GB_calloc_memory.c
@@ -2,7 +2,7 @@
 // GB_calloc_memory: wrapper for calloc_function
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_cast_array.c b/Source/GB_cast_array.c
index 35733b21d5..94e9c8eb40 100644
--- a/Source/GB_cast_array.c
+++ b/Source/GB_cast_array.c
@@ -2,7 +2,7 @@
 // GB_cast_array: typecast an array
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -18,9 +18,9 @@
 
 void GB_cast_array              // typecast an array
 (
-    GB_void *GB_RESTRICT Cx,       // output array
+    GB_void *Cx,                // output array
     const GB_Type_code code1,   // type code for Cx
-    const GB_void *GB_RESTRICT Ax, // input array
+    GB_void *Ax,                // input array
     const GB_Type_code code2,   // type code for Ax
     const int64_t anz,          // number of entries in Cx and Ax
     GB_Context Context
@@ -65,8 +65,8 @@ void GB_cast_array              // typecast an array
 
         #define GB_WORKER(ignore1,zname,ztype,xname,xtype)                  \
         {                                                                   \
-            GrB_Info info = GB_unop (zname,xname) ((ztype *GB_RESTRICT) Cx,    \
-                (const xtype *GB_RESTRICT) Ax, anz, nthreads) ;                \
+            GrB_Info info = GB_unop (zname,xname) ((ztype *) Cx,            \
+                (xtype *) Ax, anz, nthreads) ;                              \
             if (info == GrB_SUCCESS) return ;                               \
         }                                                                   \
         break ;
@@ -83,6 +83,10 @@ void GB_cast_array              // typecast an array
     // generic worker: typecasting for compact case only
     //--------------------------------------------------------------------------
 
+    // This is dead code unless GBCOMPACT is enabled.
+
+    GB_BURBLE_N (anz, "generic ") ;
+
     int64_t csize = GB_code_size (code1, 1) ;
     int64_t asize = GB_code_size (code2, 1) ;
     GB_cast_function cast_A_to_C = GB_cast_factory (code1, code2) ;
diff --git a/Source/GB_cast_factory.c b/Source/GB_cast_factory.c
index 4d89e2dba1..0d3a14a937 100644
--- a/Source/GB_cast_factory.c
+++ b/Source/GB_cast_factory.c
@@ -2,7 +2,7 @@
 // GB_cast_factory: return a pointer to a typecasting function
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,9 @@
 // output z, casting as needed.  That is, it computes z = (type of z) x.
 // s is the size for user-defined types, which can only be copied.
 
+// If the operator is FIRST, SECOND, or PAIR, this function is called for the
+// cast function on the unused argument, but the result is then unused. 
+
 #include "GB.h"
 
 GB_cast_function GB_cast_factory   // returns pointer to function to cast x to z
@@ -20,14 +23,6 @@ GB_cast_function GB_cast_factory   // returns pointer to function to cast x to z
 )
 { 
 
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (GB_code_compatible (code1, code2)) ;
-    ASSERT (code1 <= GB_UDT_code) ;
-    ASSERT (code2 <= GB_UDT_code) ;
-
     //--------------------------------------------------------------------------
     // define the worker for the switch factory
     //--------------------------------------------------------------------------
@@ -48,7 +43,6 @@ GB_cast_function GB_cast_factory   // returns pointer to function to cast x to z
     // user-defined types fall through the switch factory to here
     //--------------------------------------------------------------------------
 
-    // if code1 or code2 are GB_UDT_code or GB_UCT_code
     return (&GB_copy_user_user) ;
 }
 
diff --git a/Source/GB_cblas.h b/Source/GB_cblas.h
new file mode 100644
index 0000000000..5a3086c3b7
--- /dev/null
+++ b/Source/GB_cblas.h
@@ -0,0 +1,34 @@
+//------------------------------------------------------------------------------
+// GB_cblas.h: definitions to use the CBLAS
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#ifndef GB_CBLAS_H
+#define GB_CBLAS_H
+
+#ifdef GB_HAS_CBLAS
+
+    #ifdef MKL_ILP64
+
+    // use the Intel MKL ILP64 parallel BLAS
+    #include "mkl.h"
+    #define GB_CBLAS_INT MKL_INT
+    #define GB_CBLAS_INT_MAX INT64_MAX
+
+    #else
+
+    // FUTURE: other BLAS packages here
+    #include "cblas.h"
+    #define GB_CBLAS_INT int
+    #define GB_CBLAS_INT_MAX INT32_MAX
+    // etc ...
+
+    #endif
+
+#endif
+#endif
+
diff --git a/Source/GB_cblas_daxpy.c b/Source/GB_cblas_daxpy.c
new file mode 100644
index 0000000000..68f7edc4c8
--- /dev/null
+++ b/Source/GB_cblas_daxpy.c
@@ -0,0 +1,95 @@
+//------------------------------------------------------------------------------
+// GB_cblas_daxpy: Y += alpha*X where X and Y are dense double arrays
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Y += alpha*X where are X and Y are dense arrays of stride 1, of type double.
+
+// X and Y can have any size, and will often be larger than 2^31.
+
+#include "GB_dense.h"
+#include "GB_cblas.h"
+
+void GB_cblas_daxpy         // Y += alpha*X
+(
+    const int64_t n,        // length of X and Y (note the int64_t type)
+    const double alpha,     // scale factor
+    const double *X,        // the array X, always stride 1
+    double *Y,              // the array Y, always stride 1
+    int nthreads            // maximum # of threads to use
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    ASSERT (Y != NULL) ;
+    ASSERT (X != NULL) ;
+    ASSERT (nthreads >= 1) ;
+
+    #if GB_HAS_CBLAS
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    // See GB_cblas_saxpy.c for a discussion.
+
+    #ifdef MKL_ILP64
+    int save_nthreads = mkl_set_num_threads_local (nthreads) ;
+    #endif
+
+    //--------------------------------------------------------------------------
+    // Y += alpha*X
+    //--------------------------------------------------------------------------
+
+    GB_CBLAS_INT stride1 = (GB_CBLAS_INT) 1 ;
+
+    if (sizeof (GB_CBLAS_INT) == sizeof (int64_t))
+    {
+        // call *axpy in a single chunk
+        cblas_daxpy     // y += alpha*x
+        (
+            n,          // length of x and y
+            alpha,      // scale factor (typically 1.0)
+            X,
+            stride1,    // x is stride 1
+            Y,
+            stride1     // y is stride 1
+        ) ;
+    }
+    else
+    {
+        // call *axpy in chunks of size GB_CBLAS_INT_MAX
+        for (int64_t p = 0 ; p < n ; p += GB_CBLAS_INT_MAX)
+        {
+            GB_CBLAS_INT chunk =
+                (GB_CBLAS_INT) GB_IMIN (n - p, GB_CBLAS_INT_MAX) ;
+            cblas_daxpy     // y += alpha*x
+            (
+                chunk,      // length of x and y (this chunk)
+                alpha,      // scale factor (typically 1.0)
+                X + p,      // this chunk of x
+                stride1,    // x is stride 1
+                Y + p,      // this chunk of y
+                stride1     // y is stride 1
+            ) ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // restore the # of threads for the BLAS
+    //--------------------------------------------------------------------------
+
+    #ifdef MKL_ILP64
+    mkl_set_num_threads_local (save_nthreads) ;
+    #endif
+
+    #endif
+}
+
diff --git a/Source/GB_cblas_saxpy.c b/Source/GB_cblas_saxpy.c
new file mode 100644
index 0000000000..1bca572453
--- /dev/null
+++ b/Source/GB_cblas_saxpy.c
@@ -0,0 +1,117 @@
+//------------------------------------------------------------------------------
+// GB_cblas_saxpy: Y += alpha*X where X and Y are dense float arrays
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Y += alpha*X where are X and Y are dense arrays of stride 1, of type float.
+
+// X and Y can have any size, and will often be larger than 2^31.
+
+#include "GB_dense.h"
+#include "GB_cblas.h"
+
+void GB_cblas_saxpy         // Y += alpha*X
+(
+    const int64_t n,        // length of X and Y (note the int64_t type)
+    const float alpha,      // scale factor
+    const float *X,         // the array X, always stride 1
+    float *Y,               // the array Y, always stride 1
+    int nthreads            // maximum # of threads to use
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    // The GB_cblas_* gateway functions always exist in the GraphBLAS library,
+    // but if GB_HAS_CBLAS is false at compile time, they become stubs that do
+    // nothing at all, and they are never called.
+
+    ASSERT (Y != NULL) ;
+    ASSERT (X != NULL) ;
+    ASSERT (nthreads >= 1) ;
+
+    #if GB_HAS_CBLAS
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    // Use no more than nthreads to do the saxpy.  Fewer threads may be used at
+    // this function's discretion, but no more than nthreads can be used.  This
+    // is a strict requirement.
+
+    // Note that *other* GraphBLAS threads may be calling this function at the
+    // same time, so the CBLAS thread setting must be done in a thread-safe
+    // manner.  Multiple user threads could be calling GrB_* operations in
+    // parallel, and each GraphBLAS call may have its own unique thread
+    // maximum.  So the solution cannot assume that this function is only being
+    // called in parallel from a single GrB_* operation.
+
+    // Set the # of threads to use, in a thread-safe manner.  Do so in a
+    // portable manner, for any BLAS library.  #ifdef's may be used to handle
+    // this, depending on which BLAS library is being used, as determined by
+    // the CMake build system.
+
+    // Even if this function is called inside a parallel region, nthreads
+    // could be larger than one.  In that case, nested parallelism has been
+    // requested.
+
+    #ifdef MKL_ILP64
+    int save_nthreads = mkl_set_num_threads_local (nthreads) ;
+    #endif
+
+    //--------------------------------------------------------------------------
+    // Y += alpha*X
+    //--------------------------------------------------------------------------
+
+    GB_CBLAS_INT stride1 = (GB_CBLAS_INT) 1 ;
+
+    if (sizeof (GB_CBLAS_INT) == sizeof (int64_t))
+    {
+        // call *axpy in a single chunk
+        cblas_saxpy     // y += alpha*x
+        (
+            n,          // length of x and y
+            alpha,      // scale factor (typically 1.0)
+            X,
+            stride1,    // x is stride 1
+            Y,
+            stride1     // y is stride 1
+        ) ;
+    }
+    else
+    {
+        // call *axpy in chunks of size GB_CBLAS_INT_MAX
+        for (int64_t p = 0 ; p < n ; p += GB_CBLAS_INT_MAX)
+        {
+            GB_CBLAS_INT chunk =
+                (GB_CBLAS_INT) GB_IMIN (n - p, GB_CBLAS_INT_MAX) ;
+            cblas_saxpy     // y += alpha*x
+            (
+                chunk,      // length of x and y (this chunk)
+                alpha,      // scale factor (typically 1.0)
+                X + p,      // this chunk of x
+                stride1,    // x is stride 1
+                Y + p,      // this chunk of y
+                stride1     // y is stride 1
+            ) ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // restore the # of threads for the BLAS
+    //--------------------------------------------------------------------------
+
+    #ifdef MKL_ILP64
+    mkl_set_num_threads_local (save_nthreads) ;
+    #endif
+
+    #endif
+}
+
diff --git a/Source/GB_clear.c b/Source/GB_clear.c
index 9dd26980a0..5d6ab284f2 100644
--- a/Source/GB_clear.c
+++ b/Source/GB_clear.c
@@ -2,7 +2,7 @@
 // GB_clear: clears the content of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_code_check.c b/Source/GB_code_check.c
index fb6c3e458e..0a3b75b8fa 100644
--- a/Source/GB_code_check.c
+++ b/Source/GB_code_check.c
@@ -2,7 +2,7 @@
 // GB_code_check: print an entry using a type code
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,7 +28,7 @@ GrB_Info GB_code_check          // print an entry using a type code
 
     switch (code)
     {
-        #ifdef GB_DEVELOPER
+        #if GB_DEVELOPER
 
         case GB_BOOL_code:   i = *((bool     *) x) ; GBPR ("bool "    GBd, i) ; break ;
         case GB_INT8_code:   i = *((int8_t   *) x) ; GBPR ("int8 "    GBd, i) ; break ;
@@ -85,7 +85,6 @@ GrB_Info GB_code_check          // print an entry using a type code
             break ;
         #endif
 
-        case GB_UCT_code    :
         case GB_UDT_code    :
             { 
                 GBPR ("[user-defined value]") ;
diff --git a/Source/GB_code_compatible.c b/Source/GB_code_compatible.c
index c58cd31afb..7efbb8f344 100644
--- a/Source/GB_code_compatible.c
+++ b/Source/GB_code_compatible.c
@@ -2,7 +2,7 @@
 // GB_code_compatible: return true if domains are compatible
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,8 +21,8 @@ bool GB_code_compatible         // check if two types can be typecast
 )
 {
 
-    bool a_user = (acode == GB_UDT_code || acode == GB_UCT_code) ;
-    bool b_user = (bcode == GB_UDT_code || bcode == GB_UCT_code) ;
+    bool a_user = (acode == GB_UDT_code) ;
+    bool b_user = (bcode == GB_UDT_code) ;
 
     if (a_user || b_user)
     { 
diff --git a/Source/GB_code_size.c b/Source/GB_code_size.c
index 1e6da80b93..b919554c26 100644
--- a/Source/GB_code_size.c
+++ b/Source/GB_code_size.c
@@ -2,7 +2,7 @@
 // GB_code_size: given a type code, return sizeof (type)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,7 +31,6 @@ size_t GB_code_size             // return the size of a type, given its code
         case GB_UINT64_code : return (sizeof (uint64_t)) ;
         case GB_FP32_code   : return (sizeof (float))    ;
         case GB_FP64_code   : return (sizeof (double))   ;
-        case GB_UCT_code    :
         case GB_UDT_code    : return (usize) ;
         default             : return (0) ;
     }
diff --git a/Source/GB_code_string.c b/Source/GB_code_string.c
index 34eb78faab..11e6e4aee3 100644
--- a/Source/GB_code_string.c
+++ b/Source/GB_code_string.c
@@ -2,7 +2,7 @@
 // GB_code_string: convert a type code into a string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,7 +30,6 @@ char *GB_code_string            // return a static string for a type name
         case GB_UINT64_code : return ("uint64_t"    ) ;
         case GB_FP32_code   : return ("float"       ) ;
         case GB_FP64_code   : return ("double"      ) ;
-        case GB_UCT_code    :
         case GB_UDT_code    : return ("user-defined") ;
         default             : return ("unknown!"    ) ;
     }
diff --git a/Source/GB_code_type.c b/Source/GB_code_type.c
index 6927746131..284c4aa720 100644
--- a/Source/GB_code_type.c
+++ b/Source/GB_code_type.c
@@ -2,7 +2,7 @@
 // GB_code_type: convert a type code to a GrB_Type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -37,7 +37,6 @@ GrB_Type GB_code_type           // return the GrB_Type corresponding to the code
         case GB_UINT64_code : return (GrB_UINT64) ;
         case GB_FP32_code   : return (GrB_FP32)   ;
         case GB_FP64_code   : return (GrB_FP64)   ;
-        case GB_UCT_code    : 
         case GB_UDT_code    : 
         default             : return (type) ;
     }
diff --git a/Source/GB_compatible.c b/Source/GB_compatible.c
index 880dca72c1..fed25d1b67 100644
--- a/Source/GB_compatible.c
+++ b/Source/GB_compatible.c
@@ -2,7 +2,7 @@
 // GB_compatible: check input and operators for type compatibility
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_control.h b/Source/GB_control.h
index 954b79c2e5..f65a4a861d 100644
--- a/Source/GB_control.h
+++ b/Source/GB_control.h
@@ -2,7 +2,7 @@
 // GB_control.h:  disable hard-coded functions to reduce code size
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -127,6 +127,8 @@
 
 // #define GxB_NO_FIRST     1
 // #define GxB_NO_SECOND    1
+// #define GxB_NO_PAIR      1
+// #define GxB_NO_ANY       1
 // #define GxB_NO_MIN       1
 // #define GxB_NO_MAX       1
 // #define GxB_NO_PLUS      1
@@ -199,6 +201,28 @@
 // #define GxB_NO_SECOND_FP64   1
 // #define GxB_NO_SECOND_BOOL   1
 
+// #define GxB_NO_PAIR_INT8     1
+// #define GxB_NO_PAIR_INT16    1
+// #define GxB_NO_PAIR_INT32    1
+// #define GxB_NO_PAIR_INT64    1
+// #define GxB_NO_PAIR_UINT8    1
+// #define GxB_NO_PAIR_UINT16   1
+// #define GxB_NO_PAIR_UINT32   1
+// #define GxB_NO_PAIR_UINT64   1
+// #define GxB_NO_PAIR_FP32     1
+// #define GxB_NO_PAIR_FP64     1
+
+// #define GxB_NO_ANY_INT8      1
+// #define GxB_NO_ANY_INT16     1
+// #define GxB_NO_ANY_INT32     1
+// #define GxB_NO_ANY_INT64     1
+// #define GxB_NO_ANY_UINT8     1
+// #define GxB_NO_ANY_UINT16    1
+// #define GxB_NO_ANY_UINT32    1
+// #define GxB_NO_ANY_UINT64    1
+// #define GxB_NO_ANY_FP32      1
+// #define GxB_NO_ANY_FP64      1
+
 // #define GxB_NO_MIN_INT8      1
 // #define GxB_NO_MIN_INT16     1
 // #define GxB_NO_MIN_INT32     1
@@ -488,6 +512,10 @@
 
 // #define GxB_NO_EQ_FIRST_BOOL         1
 
+// #define GxB_NO_EQ_PAIR_BOOL          1
+
+// #define GxB_NO_EQ_ANY_BOOL           1
+
 // #define GxB_NO_EQ_GE_BOOL            1
 // #define GxB_NO_EQ_GE_FP32            1
 // #define GxB_NO_EQ_GE_FP64            1
@@ -569,6 +597,8 @@
 // #define GxB_NO_LAND_EQ_UINT8         1
 
 // #define GxB_NO_LAND_FIRST_BOOL       1
+// #define GxB_NO_LAND_PAIR_BOOL        1
+// #define GxB_NO_LAND_ANY_BOOL         1
 
 // #define GxB_NO_LAND_GE_BOOL          1
 // #define GxB_NO_LAND_GE_FP32          1
@@ -651,6 +681,8 @@
 // #define GxB_NO_LOR_EQ_UINT8          1
 
 // #define GxB_NO_LOR_FIRST_BOOL        1
+// #define GxB_NO_LOR_PAIR_BOOL         1
+// #define GxB_NO_LOR_ANY_BOOL          1
 
 // #define GxB_NO_LOR_GE_BOOL           1
 // #define GxB_NO_LOR_GE_FP32           1
@@ -732,6 +764,8 @@
 // #define GxB_NO_LXOR_EQ_UINT8         1
 
 // #define GxB_NO_LXOR_FIRST_BOOL       1
+// #define GxB_NO_LXOR_PAIR_BOOL        1
+// #define GxB_NO_LXOR_ANY_BOOL         1
 
 // #define GxB_NO_LXOR_GE_BOOL          1
 // #define GxB_NO_LXOR_GE_FP32          1
@@ -822,6 +856,28 @@
 // #define GxB_NO_MAX_FIRST_UINT64      1
 // #define GxB_NO_MAX_FIRST_UINT8       1
 
+// #define GxB_NO_MAX_PAIR_FP32         1
+// #define GxB_NO_MAX_PAIR_FP64         1
+// #define GxB_NO_MAX_PAIR_INT16        1
+// #define GxB_NO_MAX_PAIR_INT32        1
+// #define GxB_NO_MAX_PAIR_INT64        1
+// #define GxB_NO_MAX_PAIR_INT8         1
+// #define GxB_NO_MAX_PAIR_UINT16       1
+// #define GxB_NO_MAX_PAIR_UINT32       1
+// #define GxB_NO_MAX_PAIR_UINT64       1
+// #define GxB_NO_MAX_PAIR_UINT8        1
+
+// #define GxB_NO_MAX_ANY_FP32          1
+// #define GxB_NO_MAX_ANY_FP64          1
+// #define GxB_NO_MAX_ANY_INT16         1
+// #define GxB_NO_MAX_ANY_INT32         1
+// #define GxB_NO_MAX_ANY_INT64         1
+// #define GxB_NO_MAX_ANY_INT8          1
+// #define GxB_NO_MAX_ANY_UINT16        1
+// #define GxB_NO_MAX_ANY_UINT32        1
+// #define GxB_NO_MAX_ANY_UINT64        1
+// #define GxB_NO_MAX_ANY_UINT8         1
+
 // #define GxB_NO_MAX_ISEQ_FP32         1
 // #define GxB_NO_MAX_ISEQ_FP64         1
 // #define GxB_NO_MAX_ISEQ_INT16        1
@@ -1031,6 +1087,28 @@
 // #define GxB_NO_MIN_FIRST_UINT64      1
 // #define GxB_NO_MIN_FIRST_UINT8       1
 
+// #define GxB_NO_MIN_PAIR_FP32         1
+// #define GxB_NO_MIN_PAIR_FP64         1
+// #define GxB_NO_MIN_PAIR_INT16        1
+// #define GxB_NO_MIN_PAIR_INT32        1
+// #define GxB_NO_MIN_PAIR_INT64        1
+// #define GxB_NO_MIN_PAIR_INT8         1
+// #define GxB_NO_MIN_PAIR_UINT16       1
+// #define GxB_NO_MIN_PAIR_UINT32       1
+// #define GxB_NO_MIN_PAIR_UINT64       1
+// #define GxB_NO_MIN_PAIR_UINT8        1
+
+// #define GxB_NO_MIN_ANY_FP32          1
+// #define GxB_NO_MIN_ANY_FP64          1
+// #define GxB_NO_MIN_ANY_INT16         1
+// #define GxB_NO_MIN_ANY_INT32         1
+// #define GxB_NO_MIN_ANY_INT64         1
+// #define GxB_NO_MIN_ANY_INT8          1
+// #define GxB_NO_MIN_ANY_UINT16        1
+// #define GxB_NO_MIN_ANY_UINT32        1
+// #define GxB_NO_MIN_ANY_UINT64        1
+// #define GxB_NO_MIN_ANY_UINT8         1
+
 // #define GxB_NO_MIN_ISEQ_FP32         1
 // #define GxB_NO_MIN_ISEQ_FP64         1
 // #define GxB_NO_MIN_ISEQ_INT16        1
@@ -1240,6 +1318,28 @@
 // #define GxB_NO_PLUS_FIRST_UINT64     1
 // #define GxB_NO_PLUS_FIRST_UINT8      1
 
+// #define GxB_NO_PLUS_PAIR_FP32        1
+// #define GxB_NO_PLUS_PAIR_FP64        1
+// #define GxB_NO_PLUS_PAIR_INT16       1
+// #define GxB_NO_PLUS_PAIR_INT32       1
+// #define GxB_NO_PLUS_PAIR_INT64       1
+// #define GxB_NO_PLUS_PAIR_INT8        1
+// #define GxB_NO_PLUS_PAIR_UINT16      1
+// #define GxB_NO_PLUS_PAIR_UINT32      1
+// #define GxB_NO_PLUS_PAIR_UINT64      1
+// #define GxB_NO_PLUS_PAIR_UINT8       1
+
+// #define GxB_NO_PLUS_ANY_FP32         1
+// #define GxB_NO_PLUS_ANY_FP64         1
+// #define GxB_NO_PLUS_ANY_INT16        1
+// #define GxB_NO_PLUS_ANY_INT32        1
+// #define GxB_NO_PLUS_ANY_INT64        1
+// #define GxB_NO_PLUS_ANY_INT8         1
+// #define GxB_NO_PLUS_ANY_UINT16       1
+// #define GxB_NO_PLUS_ANY_UINT32       1
+// #define GxB_NO_PLUS_ANY_UINT64       1
+// #define GxB_NO_PLUS_ANY_UINT8        1
+
 // #define GxB_NO_PLUS_ISEQ_FP32        1
 // #define GxB_NO_PLUS_ISEQ_FP64        1
 // #define GxB_NO_PLUS_ISEQ_INT16       1
@@ -1449,6 +1549,28 @@
 // #define GxB_NO_TIMES_FIRST_UINT64    1
 // #define GxB_NO_TIMES_FIRST_UINT8     1
 
+// #define GxB_NO_TIMES_PAIR_FP32       1
+// #define GxB_NO_TIMES_PAIR_FP64       1
+// #define GxB_NO_TIMES_PAIR_INT16      1
+// #define GxB_NO_TIMES_PAIR_INT32      1
+// #define GxB_NO_TIMES_PAIR_INT64      1
+// #define GxB_NO_TIMES_PAIR_INT8       1
+// #define GxB_NO_TIMES_PAIR_UINT16     1
+// #define GxB_NO_TIMES_PAIR_UINT32     1
+// #define GxB_NO_TIMES_PAIR_UINT64     1
+// #define GxB_NO_TIMES_PAIR_UINT8      1
+
+// #define GxB_NO_TIMES_ANY_FP32        1
+// #define GxB_NO_TIMES_ANY_FP64        1
+// #define GxB_NO_TIMES_ANY_INT16       1
+// #define GxB_NO_TIMES_ANY_INT32       1
+// #define GxB_NO_TIMES_ANY_INT64       1
+// #define GxB_NO_TIMES_ANY_INT8        1
+// #define GxB_NO_TIMES_ANY_UINT16      1
+// #define GxB_NO_TIMES_ANY_UINT32      1
+// #define GxB_NO_TIMES_ANY_UINT64      1
+// #define GxB_NO_TIMES_ANY_UINT8       1
+
 // #define GxB_NO_TIMES_ISEQ_FP32       1
 // #define GxB_NO_TIMES_ISEQ_FP64       1
 // #define GxB_NO_TIMES_ISEQ_INT16      1
diff --git a/Source/GB_copy_user_user.c b/Source/GB_copy_user_user.c
index 4681baf864..22e0d2a548 100644
--- a/Source/GB_copy_user_user.c
+++ b/Source/GB_copy_user_user.c
@@ -2,7 +2,7 @@
 // GB_copy_user_user.c: copy user a type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_create.c b/Source/GB_create.c
index 067873cdbf..9cbbdd64f7 100644
--- a/Source/GB_create.c
+++ b/Source/GB_create.c
@@ -2,7 +2,7 @@
 // GB_create: create a matrix and allocate space
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_cumsum.c b/Source/GB_cumsum.c
index 864280dbe8..bb352e1f57 100644
--- a/Source/GB_cumsum.c
+++ b/Source/GB_cumsum.c
@@ -2,7 +2,7 @@
 // GB_cumsum: cumlative sum of an array
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,11 +20,11 @@
 
 #include "GB.h"
 
-void GB_cumsum                  // compute the cumulative sum of an array
+void GB_cumsum                      // cumulative sum of an array
 (
-    int64_t *GB_RESTRICT count,    // size n+1, input/output
+    int64_t *GB_RESTRICT count,     // size n+1, input/output
     const int64_t n,
-    int64_t *GB_RESTRICT kresult,  // return k, if needed by the caller
+    int64_t *GB_RESTRICT kresult,   // return k, if needed by the caller
     int nthreads
 )
 {
@@ -55,7 +55,7 @@ void GB_cumsum                  // compute the cumulative sum of an array
     //--------------------------------------------------------------------------
 
     if (kresult == NULL)
-    { 
+    {
 
         if (nthreads <= 2)
         {
@@ -81,7 +81,16 @@ void GB_cumsum                  // compute the cumulative sum of an array
             // cumsum with multiple threads
             //------------------------------------------------------------------
 
-            int64_t ws [GB_VLA_NTHREADS(nthreads)+1] ;
+            // allocate workspace
+            int64_t *ws = NULL ;
+            GB_MALLOC_MEMORY (ws, nthreads, sizeof (int64_t)) ;
+            if (ws == NULL)
+            { 
+                // out of memory; use a single thread instead
+                GB_cumsum (count, n, NULL, 1) ;
+                return ;
+            }
+
             #pragma omp parallel num_threads(nthreads)
             {
                 // each thread sums up its own part
@@ -115,11 +124,13 @@ void GB_cumsum                  // compute the cumulative sum of an array
                 }
             }
 
+            // free workspace
+            GB_FREE_MEMORY (ws, nthreads, sizeof (int64_t)) ;
         }
 
     }
     else
-    { 
+    {
 
         if (nthreads <= 2)
         {
@@ -148,8 +159,15 @@ void GB_cumsum                  // compute the cumulative sum of an array
             // cumsum with multiple threads, also compute k
             //------------------------------------------------------------------
 
-            int64_t ws [GB_VLA_NTHREADS(nthreads)+1] ;
-            int64_t wk [GB_VLA_NTHREADS(nthreads)+1] ;
+            int64_t *ws = NULL ;
+            GB_MALLOC_MEMORY (ws, 2*nthreads, sizeof (int64_t)) ;
+            if (ws == NULL)
+            { 
+                // out of memory; use a single thread instead
+                GB_cumsum (count, n, kresult, 1) ;
+                return ;
+            }
+            int64_t *wk = ws + nthreads ;
 
             #pragma omp parallel num_threads(nthreads)
             {
@@ -194,6 +212,9 @@ void GB_cumsum                  // compute the cumulative sum of an array
                 k += wk [tid] ;
             }
             (*kresult) = k ;
+
+            // free workspace
+            GB_FREE_MEMORY (ws, 2*nthreads, sizeof (int64_t)) ;
         }
     }
 }
diff --git a/Source/GB_dense.h b/Source/GB_dense.h
new file mode 100644
index 0000000000..2dc2c5485a
--- /dev/null
+++ b/Source/GB_dense.h
@@ -0,0 +1,158 @@
+//------------------------------------------------------------------------------
+// GB_dense.h: defintions for dense matrix operations 
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#ifndef GB_DENSE_H
+#define GB_DENSE_H
+
+#include "GB_ek_slice.h"
+
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_accum: C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+void GB_dense_ewise3_accum          // C += A+B, all matrices dense
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const GrB_BinaryOp op,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_noaccum: C = A+B where A and B are dense; C anything
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_ewise3_noaccum    // C = A+B, where A and B are dense
+(
+    GrB_Matrix C,                   // input/output matrix
+    const bool C_is_dense,          // true if C is dense
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const GrB_BinaryOp op,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_23: C(:,:) += A where C is dense and A is sparse or dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_23      // C += A; C is dense, A is sparse or dense
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GrB_Matrix A,             // input matrix
+    const GrB_BinaryOp accum,       // operator to apply
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_22: C(:,:) += scalar where C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_22      // C += x where C is dense and x is a scalar
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GB_void *scalar,          // input scalar
+    const GrB_Type atype,           // type of the input scalar
+    const GrB_BinaryOp accum,       // operator to apply
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_21: C(:,:) = scalar where C becomes dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_21      // C(:,:) = x; C is a matrix and x a scalar
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GB_void *scalar,          // input scalar
+    const GrB_Type atype,           // type of the input scalar
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_05d: C(:,:)<M> = scalar ; C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_05d
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *scalar,
+    const GrB_Type atype,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_06d: C(:,:)<A> = A ; C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_06d
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_24: C(:,:) = A ; C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_24   // C = A, copy A into an existing matrix C
+(
+    GrB_Matrix C,           // output matrix to modify
+    const GrB_Matrix A,     // input matrix to copy
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_dense_subassign_25: C<M> = A ; C is empty, A is dense, M is structural
+//------------------------------------------------------------------------------
+
+GrB_Info GB_dense_subassign_25
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_cblas_saxpy: Y += alpha*X where X and Y are dense float arrays
+//------------------------------------------------------------------------------
+
+void GB_cblas_saxpy         // Y += alpha*X
+(
+    const int64_t n,        // length of X and Y (note the int64_t type)
+    const float alpha,      // scale factor
+    const float *X,         // the array X, always stride 1
+    float *Y,               // the array Y, always stride 1
+    int nthreads            // maximum # of threads to use
+) ;
+
+//------------------------------------------------------------------------------
+// GB_cblas_daxpy: Y += alpha*X where X and Y are dense double arrays
+//------------------------------------------------------------------------------
+
+void GB_cblas_daxpy         // Y += alpha*X
+(
+    const int64_t n,        // length of X and Y (note the int64_t type)
+    const double alpha,     // scale factor
+    const double *X,        // the array X, always stride 1
+    double *Y,              // the array Y, always stride 1
+    int nthreads            // maximum # of threads to use
+) ;
+
+#endif
+
diff --git a/Source/GB_dense_ewise3_accum.c b/Source/GB_dense_ewise3_accum.c
new file mode 100644
index 0000000000..102f06c8fd
--- /dev/null
+++ b/Source/GB_dense_ewise3_accum.c
@@ -0,0 +1,89 @@
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_accum: C += A+B where all 3 matries are dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#ifndef GBCOMPACT
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+void GB_dense_ewise3_accum          // C += A+B, all matrices dense
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const GrB_BinaryOp op,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C for dense C+=A+B", GB0) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
+    ASSERT (GB_is_dense (C)) ;
+    ASSERT (GB_is_dense (A)) ;
+    ASSERT (GB_is_dense (B)) ;
+    ASSERT_BINARYOP_OK (op, "op for dense C+=A+B", GB0) ;
+    ASSERT (op->ztype == C->type) ;
+    ASSERT (op->ztype == A->type) ;
+    ASSERT (op->ztype == B->type) ;
+    ASSERT (op->ztype == op->xtype) ;
+    ASSERT (op->ztype == op->ytype) ;
+    ASSERT (op->opcode >= GB_MIN_opcode) ;
+    ASSERT (op->opcode <= GB_RDIV_opcode) ;
+
+    // FUTURE::: handle IS*, LOR, LAND, LXOR operators
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t cnz = GB_NNZ (C) ;
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (3 * cnz, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    #define GB_Cdense_ewise3_accum(op,xyname) \
+        GB_Cdense_ewise3_accum_ ## op ## xyname
+
+    #define GB_BINOP_WORKER(op,xyname)                                      \
+    {                                                                       \
+        GB_Cdense_ewise3_accum(op,xyname) (C, A, B, nthreads) ;             \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    GB_Opcode opcode ;
+    GB_Type_code xycode, zcode ;
+    if (GB_binop_builtin (A->type, false, B->type, false, op, false,
+        &opcode, &xycode, &zcode))
+    { 
+        #define GB_BINOP_SUBSET
+        #include "GB_binop_factory.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C+=A+B output", GB0) ;
+}
+
+#endif
+
diff --git a/Source/GB_dense_ewise3_noaccum.c b/Source/GB_dense_ewise3_noaccum.c
new file mode 100644
index 0000000000..9addda4ac0
--- /dev/null
+++ b/Source/GB_dense_ewise3_noaccum.c
@@ -0,0 +1,105 @@
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_noaccum: C = A+B where A and B are dense, C is anything
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// FUTURE: extend to handle typecasting and generic operators.
+
+#ifndef GBCOMPACT
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+#define GB_FREE_ALL ;
+
+GrB_Info GB_dense_ewise3_noaccum    // C = A+B
+(
+    GrB_Matrix C,                   // input/output matrix
+    const bool C_is_dense,          // true if C is dense
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const GrB_BinaryOp op,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for dense C=A+B", GB0) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT (!GB_PENDING (B)) ; ASSERT (!GB_ZOMBIES (B)) ;
+    ASSERT (GB_IMPLIES (!C_is_dense, (C != A && C != B))) ;
+    ASSERT (GB_is_dense (A)) ;
+    ASSERT (GB_is_dense (B)) ;
+    ASSERT_BINARYOP_OK (op, "op for dense C=A+B", GB0) ;
+    ASSERT (op->ztype == C->type) ;
+    ASSERT (op->xtype == A->type) ;
+    ASSERT (op->ytype == B->type) ;
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t anz = GB_NNZ (A) ;
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (2 * anz, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // if C not already dense, allocate it and create its pattern (same as A)
+    //--------------------------------------------------------------------------
+
+    // clear prior content and then create a copy of the pattern of A.  Keep
+    // the same type and CSR/CSC for C.  Allocate the values of C but do not
+    // initialize them.
+
+    if (!C_is_dense)
+    { 
+        bool C_is_csc = C->is_csc ;
+        GB_PHIX_FREE (C) ;
+        GB_OK (GB_dup2 (&C, A, false, C->type, Context)) ;
+        C->is_csc = C_is_csc ;
+    }
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    #define GB_Cdense_ewise3_noaccum(op,xyname) \
+        GB_Cdense_ewise3_noaccum_ ## op ## xyname
+
+    #define GB_BINOP_WORKER(op,xyname)                                      \
+    {                                                                       \
+        info = GB_Cdense_ewise3_noaccum(op,xyname) (C, A, B, nthreads) ;    \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    GB_Opcode opcode ;
+    GB_Type_code xycode, zcode ;
+    if (GB_binop_builtin (A->type, false, B->type, false, op, false,
+        &opcode, &xycode, &zcode))
+    { 
+        #include "GB_binop_factory.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C=A+B output", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
+#endif
+
diff --git a/Source/GB_dense_subassign_05d.c b/Source/GB_dense_subassign_05d.c
new file mode 100644
index 0000000000..4319014349
--- /dev/null
+++ b/Source/GB_dense_subassign_05d.c
@@ -0,0 +1,153 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_05d: C(:,:)<M> = scalar where C is dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Method 05d: C(:,:)<M> = scalar ; no S, C is dense
+
+// M:           present
+// Mask_comp:   false
+// C_replace:   false
+// accum:       NULL
+// A:           scalar
+// S:           none
+
+#include "GB_subassign_methods.h"
+#include "GB_dense.h"
+#ifndef GBCOMPACT
+#include "GB_type__include.h"
+#endif
+
+#undef  GB_FREE_WORK
+#define GB_FREE_WORK \
+    GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ;
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL GB_FREE_WORK
+
+GrB_Info GB_dense_subassign_05d
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *scalar,
+    const GrB_Type atype,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT (GB_is_dense (C)) ;
+    ASSERT (!GB_PENDING (C)) ;
+    ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT_MATRIX_OK (C, "C for subassign method_05d", GB0) ;
+    const GB_Type_code ccode = C->type->code ;
+    const size_t csize = C->type->size ;
+    GB_GET_SCALAR ;
+
+    //--------------------------------------------------------------------------
+    // Method 05d: C(:,:)<M> = scalar ; no S; C is dense
+    //--------------------------------------------------------------------------
+
+    // Time: Optimal:  the method must iterate over all entries in M,
+    // and the time is O(nnz(M)).
+
+    //--------------------------------------------------------------------------
+    // Parallel: slice M into equal-sized chunks
+    //--------------------------------------------------------------------------
+
+    int64_t mnz   = GB_NNZ (M) ;
+    int64_t mnvec = M->nvec ;
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (mnz + mnvec, chunk, nthreads_max) ;
+    int ntasks = (nthreads == 1) ? 1 : (8 * nthreads) ;
+    ntasks = GB_IMIN (ntasks, mnz) ;
+    ntasks = GB_IMAX (ntasks, 1) ;
+
+    //--------------------------------------------------------------------------
+    // slice the entries for each task
+    //--------------------------------------------------------------------------
+
+    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
+    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
+    // vectors may be shared with prior slices and subsequent slices.
+
+    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
+    if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, M, ntasks))
+    { 
+        // out of memory
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+    #define GB_Cdense_05d(xyname) GB_Cdense_05d_ ## xyname
+
+    #define GB_1TYPE_WORKER(xyname)                                         \
+    {                                                                       \
+        info = GB_Cdense_05d(xyname) (C, M, Mask_struct, cwork,             \
+            kfirst_slice, klast_slice, pstart_slice, ntasks, nthreads) ;    \
+        done = (info != GrB_NO_VALUE) ;                                     \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    #ifndef GBCOMPACT
+
+        // C<M> = x
+        #include "GB_1type_factory.c"
+
+    #endif
+
+    //--------------------------------------------------------------------------
+    // C<M> = x for user-defined types
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    { 
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of A and C
+        //----------------------------------------------------------------------
+
+        GB_BURBLE_MATRIX (M, "generic ") ;
+
+        const size_t csize = C->type->size ;
+
+        // Cx [p] = scalar
+        #define GB_COPY_SCALAR_TO_C(p,x) \
+            memcpy (Cx + ((p)*csize), x, csize)
+
+        #define GB_CTYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+
+        #include "GB_dense_subassign_05d_template.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "C output for subassign method_05d", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_06d.c b/Source/GB_dense_subassign_06d.c
new file mode 100644
index 0000000000..2dadc642d3
--- /dev/null
+++ b/Source/GB_dense_subassign_06d.c
@@ -0,0 +1,158 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_06d: C(:,:)<A> = A; C is dense, and M and A are aliased
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Method 06d: C(:,:)<A> = A ; no S, C is dense, M and A are aliased
+
+// M:           present
+// Mask_comp:   false
+// C_replace:   false
+// accum:       NULL
+// A:           matrix, and aliased to M
+// S:           none
+
+#include "GB_subassign_methods.h"
+#include "GB_dense.h"
+#ifndef GBCOMPACT
+#include "GB_type__include.h"
+#endif
+
+#undef  GB_FREE_WORK
+#define GB_FREE_WORK \
+    GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ;
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL GB_FREE_WORK
+
+GrB_Info GB_dense_subassign_06d
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT (GB_is_dense (C)) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT_MATRIX_OK (C, "C for subassign method_06d", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for subassign method_06d", GB0) ;
+    const GB_Type_code ccode = C->type->code ;
+
+    //--------------------------------------------------------------------------
+    // Method 06d: C(:,:)<A> = A ; no S; C is dense, M and A are aliased
+    //--------------------------------------------------------------------------
+
+    // Time: Optimal:  the method must iterate over all entries in A,
+    // and the time is O(nnz(A)).
+
+    //--------------------------------------------------------------------------
+    // Parallel: slice A into equal-sized chunks
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (GB_NNZ (A) + A->nvec, chunk, nthreads_max) ;
+    int ntasks = (nthreads == 1) ? 1 : (8 * nthreads) ;
+    ntasks = GB_IMIN (ntasks, GB_NNZ (A)) ;
+    ntasks = GB_IMAX (ntasks, 1) ;
+
+    //--------------------------------------------------------------------------
+    // slice the entries for each task
+    //--------------------------------------------------------------------------
+
+    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
+    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
+    // vectors may be shared with prior slices and subsequent slices.
+
+    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
+    if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
+    { 
+        // out of memory
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+    #define GB_Cdense_06d(xyname) GB_Cdense_06d_ ## xyname
+
+    #define GB_1TYPE_WORKER(xyname)                                         \
+    {                                                                       \
+        info = GB_Cdense_06d(xyname) (C, A, Mask_struct,                    \
+            kfirst_slice, klast_slice, pstart_slice, ntasks, nthreads) ;    \
+        done = (info != GrB_NO_VALUE) ;                                     \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    #ifndef GBCOMPACT
+
+        if (C->type == A->type && ccode < GB_UDT_code)
+        { 
+            // C<A> = A
+            #include "GB_1type_factory.c"
+        }
+
+    #endif
+
+    //--------------------------------------------------------------------------
+    // C<A> = A for user-defined types, and typecasting
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    { 
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of A and C
+        //----------------------------------------------------------------------
+
+        GB_BURBLE_MATRIX (A, "generic ") ;
+
+        const size_t csize = C->type->size ;
+        const size_t asize = A->type->size ;
+        const size_t acode = A->type->code ;
+        GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;
+
+        // Cx [p] = (ctype) Ax [pA]
+        #define GB_COPY_A_TO_C(Cx,p,Ax,pA) \
+            cast_A_to_C (Cx + ((p)*csize), Ax + ((pA)*asize), asize)
+
+        #define GB_AX_MASK(Ax,pA,asize) \
+            GB_mcast (Ax, pA, asize)
+
+        #define GB_CTYPE GB_void
+        #define GB_ATYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+
+        #include "GB_dense_subassign_06d_template.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "C output for subassign method_06d", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_21.c b/Source/GB_dense_subassign_21.c
new file mode 100644
index 0000000000..8c6d60ee3a
--- /dev/null
+++ b/Source/GB_dense_subassign_21.c
@@ -0,0 +1,191 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_21: C(:,:) = x where x is a scalar
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C(:,:) = x where C is a matrix and x is a scalar
+
+#include "GB_dense.h"
+#include "GB_select.h"
+#include "GB_Pending.h"
+
+GrB_Info GB_dense_subassign_21      // C(:,:) = x; C is a matrix and x a scalar
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GB_void *scalar,          // input scalar
+    const GrB_Type atype,           // type of the input scalar
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for C(:,:)=x", GB0) ;
+    ASSERT (scalar != NULL) ;
+    // any prior pending tuples are discarded, and all zombies will be killed
+    ASSERT (GB_PENDING_OK (C)) ; ASSERT (GB_ZOMBIES_OK (C)) ;
+    ASSERT_TYPE_OK (atype, "atype for C(:,:)=x", GB0) ;
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t cvdim = C->vdim ;
+    int64_t cvlen = C->vlen ;
+    GrB_Index cnzmax ;
+    bool ok = GB_Index_multiply (&cnzmax, cvlen, cvdim) ;
+    if (!ok)
+    { 
+        // problem too large
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (cnzmax, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // typecast the scalar into the same type as C
+    //--------------------------------------------------------------------------
+
+    int64_t csize = C->type->size ;
+    GB_cast_function
+        cast_A_to_C = GB_cast_factory (C->type->code, atype->code) ;
+    GB_void cwork [GB_VLA(csize)] ;
+    cast_A_to_C (cwork, scalar, atype->size) ;
+
+    //--------------------------------------------------------------------------
+    // create the pattern, and allocate space for values, if needed
+    //--------------------------------------------------------------------------
+
+    // discard any prior pending tuples
+    GB_Pending_free (&(C->Pending)) ;
+
+    int64_t pC ;
+
+    if (GB_NNZ (C) < cnzmax || C->x_shallow || C->i_shallow || C->is_hyper
+        || GB_ZOMBIES (C))
+    {
+
+        //----------------------------------------------------------------------
+        // C is not yet dense: create pattern and allocate values
+        //----------------------------------------------------------------------
+
+        // clear all prior content and recreate it; use exising header for C.
+        // do not malloc C->x if the scalar is zero; calloc it later.
+        bool scalar_is_nonzero = GB_is_nonzero (cwork, csize) ;
+        GB_PHIX_FREE (C) ;
+        GB_CREATE (&C, C->type, cvlen, cvdim, GB_Ap_malloc, C->is_csc,
+            GB_FORCE_NONHYPER, C->hyper_ratio, C->vdim, cnzmax,
+            scalar_is_nonzero, Context) ;
+        if (info != GrB_SUCCESS)
+        { 
+            // out of memory
+            return (GB_OUT_OF_MEMORY) ;
+        }
+
+        int64_t *GB_RESTRICT Cp = C->p ;
+        int64_t *GB_RESTRICT Ci = C->i ;
+        int nth = GB_nthreads (cvdim, chunk, nthreads_max) ;
+
+        // FUTURE:: dense data structure, where Cp and Ci will be implicit
+
+        int64_t k ;
+        #pragma omp parallel for num_threads(nth) schedule(static)
+        for (k = 0 ; k <= cvdim ; k++)
+        { 
+            Cp [k] = k * cvlen ;
+        }
+
+        C->magic = GB_MAGIC ;
+        C->nvec_nonempty = (cvlen == 0) ? 0 : cvdim ;
+
+        #pragma omp parallel for num_threads(nthreads) schedule(static)
+        for (pC = 0 ; pC < cnzmax ; pC++)
+        { 
+            Ci [pC] = pC % cvlen ;
+        }
+
+        if (!scalar_is_nonzero)
+        { 
+            GBBURBLE ("calloc ") ;
+            GB_CALLOC_MEMORY (C->x, cnzmax, csize) ;
+        }
+
+        if (C->x == NULL)
+        { 
+            // out of memory
+            GB_PHIX_FREE (C) ;
+            return (GB_OUT_OF_MEMORY) ;
+        }
+
+        if (!scalar_is_nonzero)
+        { 
+            // quick return if the scalar is zero
+            ASSERT_MATRIX_OK (C, "C(:,:)=0 output", GB0) ;
+            return (GrB_SUCCESS) ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    // worker for built-in types
+    #define GB_WORKER(ctype)                                                \
+    {                                                                       \
+        ctype *GB_RESTRICT Cx = C->x ;                                      \
+        ctype x = (*(ctype *) cwork) ;                                      \
+        GB_PRAGMA (omp parallel for num_threads(nthreads) schedule(static)) \
+        for (pC = 0 ; pC < cnzmax ; pC++)                                   \
+        {                                                                   \
+            Cx [pC] = x ;                                                   \
+        }                                                                   \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    switch (C->type->code)
+    {
+        case GB_BOOL_code   : GB_WORKER (bool) ;
+        case GB_INT8_code   : GB_WORKER (int8_t) ;
+        case GB_INT16_code  : GB_WORKER (int16_t) ;
+        case GB_INT32_code  : GB_WORKER (int32_t) ;
+        case GB_INT64_code  : GB_WORKER (int64_t) ;
+        case GB_UINT8_code  : GB_WORKER (uint8_t) ;
+        case GB_UINT16_code : GB_WORKER (uint16_t) ;
+        case GB_UINT32_code : GB_WORKER (uint32_t) ;
+        case GB_UINT64_code : GB_WORKER (uint64_t) ;
+        case GB_FP32_code   : GB_WORKER (float) ;
+        case GB_FP64_code   : GB_WORKER (double) ;
+        default:
+            {
+                // worker for all user-defined types
+                GB_BURBLE_N (cnzmax, "generic ") ;
+                GB_void *GB_RESTRICT Cx = C->x ;
+                #pragma omp parallel for num_threads(nthreads) schedule(static)
+                for (pC = 0 ; pC < cnzmax ; pC++)
+                { 
+                    memcpy (Cx +((pC)*csize), cwork, csize) ;
+                }
+            }
+            break ;
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C(:,:)=x output", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_22.c b/Source/GB_dense_subassign_22.c
new file mode 100644
index 0000000000..2b86836a42
--- /dev/null
+++ b/Source/GB_dense_subassign_22.c
@@ -0,0 +1,149 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_22: C += x where C is dense and x is a scalar
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C += x where C is a dense matrix and x is a scalar
+
+#include "GB_dense.h"
+#ifndef GBCOMPACT
+#include "GB_binop__include.h"
+#endif
+
+GrB_Info GB_dense_subassign_22      // C += x where C is dense and x is a scalar 
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GB_void *scalar,          // input scalar
+    const GrB_Type atype,           // type of the input scalar
+    const GrB_BinaryOp accum,       // operator to apply
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for C+=x", GB0) ;
+    ASSERT (scalar != NULL) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (GB_is_dense (C)) ;
+    ASSERT_TYPE_OK (atype, "atype for C+=x", GB0) ;
+    ASSERT_BINARYOP_OK (accum, "accum for C+=x", GB0) ;
+
+    //--------------------------------------------------------------------------
+    // get the operator
+    //--------------------------------------------------------------------------
+
+    if (accum->opcode == GB_FIRST_opcode)
+    { 
+        // nothing to do
+        return (GrB_SUCCESS) ;
+    }
+
+    // C = accum (C,x) will be computed
+    ASSERT (C->type == accum->ztype) ;
+    ASSERT (C->type == accum->xtype) ;
+    ASSERT (GB_Type_compatible (atype, accum->ytype)) ;
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t cnz = GB_NNZ (C) ;
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (cnz, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // typecast the scalar into the same type as Y
+    //--------------------------------------------------------------------------
+
+    int64_t csize = C->type->size ;
+    size_t ysize = accum->ytype->size ;
+    GB_cast_function 
+        cast_A_to_Y = GB_cast_factory (accum->ytype->code, atype->code) ;
+    GB_void ywork [GB_VLA(ysize)] ;
+    cast_A_to_Y (ywork, scalar, atype->size) ;
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+    #define GB_Cdense_accumX(accum,xyname) GB_Cdense_accumX_ ## accum ## xyname
+
+    #define GB_BINOP_WORKER(accum,xyname)                                   \
+    {                                                                       \
+        info = GB_Cdense_accumX(accum,xyname) (C, ywork, nthreads) ;        \
+        done = (info != GrB_NO_VALUE) ;                                     \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    #ifndef GBCOMPACT
+
+        GB_Opcode opcode ;
+        GB_Type_code xycode, zcode ;
+        if (GB_binop_builtin (C->type, false, atype, false, accum, false,
+            &opcode, &xycode, &zcode))
+        { 
+            // accumulate sparse matrix into dense matrix with built-in operator
+            #include "GB_binop_factory.c"
+        }
+
+    #endif
+
+    //--------------------------------------------------------------------------
+    // C += x, scalar accum into dense, with typecasting or user-defined op
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    { 
+        GB_BURBLE_MATRIX (C, "generic ") ;
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of x and C
+        //----------------------------------------------------------------------
+
+        GxB_binary_function fadd = accum->function ;
+
+        //----------------------------------------------------------------------
+        // C += x via function pointers, and typecasting
+        //----------------------------------------------------------------------
+
+        // C(i,j) = C(i,j) + scalar
+        #define GB_BINOP(cout_ij, cin_aij, ywork) \
+            GB_BINARYOP (cout_ij, cin_aij, ywork)
+
+        // binary operator
+        #define GB_BINARYOP(z,x,y) fadd (z,x,y)
+
+        // address of Cx [p]
+        #define GB_CX(p) Cx +((p)*csize)
+
+        #define GB_CTYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+
+        #include "GB_dense_subassign_22_template.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C+=x output", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_23.c b/Source/GB_dense_subassign_23.c
new file mode 100644
index 0000000000..24a8c6243c
--- /dev/null
+++ b/Source/GB_dense_subassign_23.c
@@ -0,0 +1,196 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_23: C += A where C is dense and A is sparse or dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C and A must have the same vector dimension and vector length.
+// FUTURE::: the transposed case, C+=A' could easily be done.
+// The parallelism used is identical to GB_AxB_colscale.
+
+// The type of C must match the type of x and z for the accum function, since
+// C(i,j) = accum (C(i,j), A(i,j)) is handled.  The generic case here can
+// typecast A(i,j) but not C(i,j).  The case for typecasting of C is handled by
+// Method 04.
+
+#include "GB_dense.h"
+#ifndef GBCOMPACT
+#include "GB_binop__include.h"
+#endif
+
+#define GB_FREE_WORK \
+    GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ;
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL GB_FREE_WORK
+
+GrB_Info GB_dense_subassign_23      // C += A; C is dense, A is sparse or dense
+(
+    GrB_Matrix C,                   // input/output matrix
+    const GrB_Matrix A,             // input matrix
+    const GrB_BinaryOp accum,       // operator to apply
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for C+=A", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for C+=A", GB0) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT_BINARYOP_OK (accum, "accum for C+=A", GB0) ;
+    ASSERT (A->vlen == C->vlen) ;
+    ASSERT (A->vdim == C->vdim) ;
+
+    //--------------------------------------------------------------------------
+    // get the operator
+    //--------------------------------------------------------------------------
+
+    if (accum->opcode == GB_FIRST_opcode)
+    { 
+        // nothing to do
+        return (GrB_SUCCESS) ;
+    }
+
+    // C = accum (C,A) will be computed
+    ASSERT (C->type == accum->ztype) ;
+    ASSERT (C->type == accum->xtype) ;
+    ASSERT (GB_Type_compatible (A->type, accum->ytype)) ;
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    int64_t anz   = GB_NNZ (A) ;
+    int64_t anvec = A->nvec ;
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (anz + anvec, chunk, nthreads_max) ;
+    int ntasks = (nthreads == 1) ? 1 : (32 * nthreads) ;
+    ntasks = GB_IMIN (ntasks, anz) ;
+    ntasks = GB_IMAX (ntasks, 1) ;
+
+    //--------------------------------------------------------------------------
+    // slice the entries for each task
+    //--------------------------------------------------------------------------
+
+    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
+    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
+    // vectors may be shared with prior slices and subsequent slices.
+
+    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
+    if (GB_is_dense (A))
+    { 
+        // both C and A are dense; no need to construct tasks
+        GBBURBLE ("(A dense) ") ;
+    }
+    else
+    {
+        // create tasks to compute over the matrix A
+        if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A,
+            ntasks))
+        { 
+            // out of memory
+            return (GB_OUT_OF_MEMORY) ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+    #define GB_Cdense_accumA(accum,xyname) GB_Cdense_accumA_ ## accum ## xyname
+
+    #define GB_BINOP_WORKER(accum,xyname)                                   \
+    {                                                                       \
+        info = GB_Cdense_accumA(accum,xyname) (C, A,                        \
+            kfirst_slice, klast_slice, pstart_slice, ntasks, nthreads) ;    \
+        done = (info != GrB_NO_VALUE) ;                                     \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    #ifndef GBCOMPACT
+
+        GB_Opcode opcode ;
+        GB_Type_code xycode, zcode ;
+        if (GB_binop_builtin (C->type, false, A->type, false,
+            accum, false, &opcode, &xycode, &zcode))
+        { 
+            // accumulate sparse matrix into dense matrix with built-in operator
+            #include "GB_binop_factory.c"
+        }
+
+    #endif
+
+    //--------------------------------------------------------------------------
+    // C += A, sparse accum into dense, with typecasting or user-defined op
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    { 
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of A and C
+        //----------------------------------------------------------------------
+
+        GB_BURBLE_MATRIX (A, "generic ") ;
+
+        GxB_binary_function fadd = accum->function ;
+
+        size_t csize = C->type->size ;
+        size_t asize = A->type->size ;
+        size_t ysize = accum->ytype->size ;
+
+        GB_cast_function cast_A ;
+
+        // A is typecasted to y
+        cast_A = GB_cast_factory (A->type->code, accum->ytype->code) ;
+
+        //----------------------------------------------------------------------
+        // C += A via function pointers, and typecasting
+        //----------------------------------------------------------------------
+
+        // aij = A(i,j), located in Ax [pA].  Note that GB_GETB is used,
+        // since A appears as the 2nd input to z = fadd (x,y)
+        #define GB_GETB(aij,Ax,pA)                                          \
+            GB_void aij [GB_VLA(ysize)] ;                                   \
+            cast_A (aij, Ax +((pA)*asize), asize)
+
+        // C(i,j) = C(i,j) + A(i,j)
+        #define GB_BINOP(cout_ij, cin_aij, aij)                             \
+            GB_BINARYOP (cout_ij, cin_aij, aij)
+
+        // address of Cx [p]
+        #define GB_CX(p) Cx +((p)*csize)
+
+        #define GB_ATYPE GB_void
+        #define GB_CTYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+
+        #define GB_BINARYOP(z,x,y) fadd (z,x,y)
+        #include "GB_dense_subassign_23_template.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "C+=A output", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_24.c b/Source/GB_dense_subassign_24.c
new file mode 100644
index 0000000000..125634b3f0
--- /dev/null
+++ b/Source/GB_dense_subassign_24.c
@@ -0,0 +1,113 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_24: make a deep copy of a sparse matrix
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C = A, making a deep copy into an existing non-shallow matrix C, but
+// possibly reusing parts of C if C is dense.  See also GB_dup.
+
+#include "GB_dense.h"
+#define GB_FREE_ALL ;
+
+GrB_Info GB_dense_subassign_24      // C = A, copy A into an existing matrix C
+(
+    GrB_Matrix C,           // output matrix to modify
+    const GrB_Matrix A,     // input matrix to copy
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C for C_dense_subassign_24", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for A_dense_subassign_24", GB0) ;
+    ASSERT (GB_ZOMBIES_OK (A) && GB_PENDING_OK (A)) ;
+    ASSERT (GB_ZOMBIES_OK (C) && GB_PENDING_OK (C)) ;
+
+    //--------------------------------------------------------------------------
+    // delete any lingering zombies and assemble any pending tuples
+    //--------------------------------------------------------------------------
+
+    GB_WAIT (A) ;
+    if (A->nvec_nonempty < 0)
+    { 
+        A->nvec_nonempty = GB_nvec_nonempty (A, Context) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+
+    //--------------------------------------------------------------------------
+    // C = A
+    //--------------------------------------------------------------------------
+
+    int64_t anz = GB_NNZ (A) ;
+
+    bool copy_dense_A_to_C =            // copy from dense A to dense C if:
+        (
+            GB_is_dense (C)             //      both A and C are dense
+            && GB_is_dense (A)
+            && !GB_ZOMBIES (C)          //      C has no pending work
+            && !GB_PENDING (C)          // (FUTURE::: tolerate pending tuples)
+//          && !GB_ZOMBIES (A)          //      A has no pending work
+//          && !GB_PENDING (A)          //      (see GB_WAIT (A) above)
+            && !(C->p_shallow)          //      C is not shallow
+            && !(C->h_shallow)
+            && !(C->i_shallow)
+            && !(C->x_shallow)
+            && !C->is_hyper             //      both A and C are standard
+            && !A->is_hyper
+            && C->vdim == A->vdim       //      A and C have the same size
+            && C->vlen == A->vlen
+            && C->is_csc == A->is_csc   //      A and C have the same format
+            && C->p != NULL             //      C exists
+            && C->i != NULL
+            && C->x != NULL
+            && C->h == NULL             //      C is standard
+        ) ;
+
+    if (copy_dense_A_to_C)
+    { 
+
+        //----------------------------------------------------------------------
+        // copy the values from A to C; nothing else changes
+        //----------------------------------------------------------------------
+
+        GBBURBLE ("(dense copy) ") ;
+        int nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
+        GB_memcpy (C->x, A->x, anz * A->type->size, nthreads) ;
+
+    }
+    else
+    { 
+
+        //----------------------------------------------------------------------
+        // copy a sparse matrix from A to C
+        //----------------------------------------------------------------------
+
+        // clear all prior content of C, but keep the CSR/CSC format
+        GBBURBLE ("(deep copy) ") ;
+        GrB_Info info ;
+        bool C_is_csc = C->is_csc ;
+        GB_PHIX_FREE (C) ;
+        GB_OK (GB_dup2 (&C, A, true, A->type, Context)) ;
+        C->is_csc = C_is_csc ;      // do not change the CSR/CSC format of C
+    }
+
+    //-------------------------------------------------------------------------
+    // return the result
+    //--------------------------------------------------------------------------
+
+    ASSERT_MATRIX_OK (C, "C result for C_dense_subassign_24", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dense_subassign_25.c b/Source/GB_dense_subassign_25.c
new file mode 100644
index 0000000000..10aa3acfca
--- /dev/null
+++ b/Source/GB_dense_subassign_25.c
@@ -0,0 +1,172 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_25: C(:,:)<M,s> = A; C empty, A dense, M structural
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Method 25: C(:,:)<M,s> = A ; C is empty, M structure, A dense
+
+// M:           present
+// Mask_comp:   false
+// Mask_struct: true
+// C_replace:   effectively false (not relevant since C is empty)
+// accum:       NULL
+// A:           matrix
+// S:           none
+
+#include "GB_subassign_methods.h"
+#include "GB_dense.h"
+#ifndef GBCOMPACT
+#include "GB_type__include.h"
+#endif
+
+#undef  GB_FREE_WORK
+#define GB_FREE_WORK \
+    GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ;
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL GB_FREE_WORK
+
+GrB_Info GB_dense_subassign_25
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for subassign method_25", GB0) ;
+    ASSERT_MATRIX_OK (M, "M for subassign method_25", GB0) ;
+    ASSERT_MATRIX_OK (A, "A for subassign method_25", GB0) ;
+    ASSERT (GB_NNZ (C) == 0) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (M)) ; ASSERT (!GB_ZOMBIES (M)) ;
+    ASSERT (!GB_PENDING (A)) ; ASSERT (!GB_ZOMBIES (A)) ;
+    const GB_Type_code ccode = C->type->code ;
+
+    //--------------------------------------------------------------------------
+    // Method 25: C(:,:)<M> = A ; C is empty, A is dense, M is structural
+    //--------------------------------------------------------------------------
+
+    // Time: Optimal:  the method must iterate over all entries in M,
+    // and the time is O(nnz(M)).  This is also the size of C.
+
+    //--------------------------------------------------------------------------
+    // Parallel: slice M into equal-sized chunks
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int64_t mnz = GB_NNZ (M) ;
+    int nthreads = GB_nthreads (mnz + M->nvec, chunk, nthreads_max) ;
+    int ntasks = (nthreads == 1) ? 1 : (8 * nthreads) ;
+    ntasks = GB_IMIN (ntasks, mnz) ;
+    ntasks = GB_IMAX (ntasks, 1) ;
+
+    //--------------------------------------------------------------------------
+    // slice the entries for each task
+    //--------------------------------------------------------------------------
+
+    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
+    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
+    // vectors may be shared with prior slices and subsequent slices.
+
+    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
+    if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, M, ntasks))
+    { 
+        // out of memory
+        return (GB_OUT_OF_MEMORY) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // allocate C and create its pattern
+    //--------------------------------------------------------------------------
+
+    // clear prior content and then create a copy of the pattern of M.  Keep
+    // the same type and CSR/CSC for C.  Allocate the values of C but do not
+    // initialize them.
+
+    bool C_is_csc = C->is_csc ;
+    GB_PHIX_FREE (C) ;
+    GB_OK (GB_dup2 (&C, M, false, C->type, Context)) ;
+    C->is_csc = C_is_csc ;
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    bool done = false ;
+
+    #define GB_Cdense_25(xyname) GB_Cdense_25_ ## xyname
+
+    #define GB_1TYPE_WORKER(xyname)                                         \
+    {                                                                       \
+        info = GB_Cdense_25(xyname) (C, M, A,                               \
+            kfirst_slice, klast_slice, pstart_slice, ntasks, nthreads) ;    \
+        done = (info != GrB_NO_VALUE) ;                                     \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    #ifndef GBCOMPACT
+
+        if (C->type == A->type && ccode < GB_UDT_code)
+        { 
+            // C<M> = A
+            #include "GB_1type_factory.c"
+        }
+
+    #endif
+
+    //--------------------------------------------------------------------------
+    // C<M> = A for user-defined types, and typecasting
+    //--------------------------------------------------------------------------
+
+    if (!done)
+    { 
+
+        //----------------------------------------------------------------------
+        // get operators, functions, workspace, contents of A and C
+        //----------------------------------------------------------------------
+
+        GB_BURBLE_MATRIX (A, "generic ") ;
+
+        const size_t csize = C->type->size ;
+        const size_t asize = A->type->size ;
+        const size_t acode = A->type->code ;
+        GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;
+
+        // Cx [pC] = (ctype) Ax [pA]
+        #define GB_COPY_A_TO_C(Cx,pC,Ax,pA) \
+            cast_A_to_C (Cx + ((pC)*csize), Ax + ((pA)*asize), asize)
+
+        #define GB_CTYPE GB_void
+        #define GB_ATYPE GB_void
+
+        // no vectorization
+        #define GB_PRAGMA_VECTORIZE
+
+        #include "GB_dense_subassign_25_template.c"
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "C output for subassign method_25", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_dup.c b/Source/GB_dup.c
index 87087d5712..5967843175 100644
--- a/Source/GB_dup.c
+++ b/Source/GB_dup.c
@@ -2,7 +2,7 @@
 // GB_dup: make a deep copy of a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -37,7 +37,7 @@ GrB_Info GB_dup             // make an exact copy of a matrix
     const GrB_Type ctype,   // type of C, if numeric is false
     GB_Context Context
 )
-{
+{ 
 
     //--------------------------------------------------------------------------
     // check inputs
@@ -46,86 +46,17 @@ GrB_Info GB_dup             // make an exact copy of a matrix
     ASSERT (Chandle != NULL) ;
     ASSERT_MATRIX_OK (A, "A to duplicate", GB0) ;
 
-    //--------------------------------------------------------------------------
-    // determine the number of threads to use
-    //--------------------------------------------------------------------------
-
-    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
-
     //--------------------------------------------------------------------------
     // delete any lingering zombies and assemble any pending tuples
     //--------------------------------------------------------------------------
 
     GB_WAIT (A) ;
 
-    // It would also be possible to copy the pending tuples instead.  This
-    // might be useful if the input matrix has just a few of them, and then
-    // further calls to setElement will be done on the output matrix C.  On the
-    // other hand, if A has lots of pending tuples, C will inherit them, and it
-    // will double the work needed to assemble both sets of identical tuples.
-
-    // Copying zombies is easy; this code does it already with almost no
-    // change (would need to just set the # of zombies in C).
-
     //--------------------------------------------------------------------------
     // C = A
     //--------------------------------------------------------------------------
 
-    if (A->nvec_nonempty < 0)
-    { 
-        A->nvec_nonempty = GB_nvec_nonempty (A, Context) ;
-    }
-
-    (*Chandle) = NULL ;
-
-    // [ create C; allocate C->p and do not initialize it
-    // C has the exact same hypersparsity as A.
-    GrB_Info info ;
-    int64_t anz = GB_NNZ (A) ;
-    GrB_Matrix C = NULL ;           // allocate a new header for C
-    GB_CREATE (&C, numeric ? A->type : ctype, A->vlen, A->vdim, GB_Ap_malloc,
-        A->is_csc, GB_SAME_HYPER_AS (A->is_hyper), A->hyper_ratio, A->plen,
-        anz, true, Context) ;
-    if (info != GrB_SUCCESS)
-    { 
-        return (info) ;
-    }
-
-    // copy the contents of A into C
-    int64_t anvec = A->nvec ;
-    C->nvec = anvec ;
-    C->nvec_nonempty = A->nvec_nonempty ;
-    int64_t *GB_RESTRICT Cp = C->p ;
-    int64_t *GB_RESTRICT Ch = C->h ;
-    int64_t *GB_RESTRICT Ci = C->i ;
-    const int64_t *GB_RESTRICT Ap = A->p ;
-    const int64_t *GB_RESTRICT Ah = A->h ;
-    const int64_t *GB_RESTRICT Ai = A->i ;
-
-    int nthreads = GB_nthreads (anvec, chunk, nthreads_max) ;
-    GB_memcpy (Cp, Ap, (anvec+1) * sizeof (int64_t), nthreads) ;
-    if (A->is_hyper)
-    { 
-        GB_memcpy (Ch, Ah, anvec * sizeof (int64_t), nthreads) ;
-    }
-
-    nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
-    GB_memcpy (Ci, Ai, anz * sizeof (int64_t), nthreads) ;
-    if (numeric)
-    {
-        GB_memcpy (C->x, A->x, anz * A->type->size, nthreads) ;
-    }
-
-    C->magic = GB_MAGIC ;      // C->p and C->h are now initialized ]
-    #ifdef GB_DEBUG
-    if (numeric) ASSERT_MATRIX_OK (C, "C duplicate of A", GB0) ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // return the result
-    //--------------------------------------------------------------------------
-
-    (*Chandle) = C ;
-    return (GrB_SUCCESS) ;
+    (*Chandle) = NULL ; // create a new header for C
+    return (GB_dup2 (Chandle, A, numeric, ctype, Context)) ;
 }
 
diff --git a/Source/GB_dup2.c b/Source/GB_dup2.c
new file mode 100644
index 0000000000..bae54b28e9
--- /dev/null
+++ b/Source/GB_dup2.c
@@ -0,0 +1,96 @@
+//------------------------------------------------------------------------------
+// GB_dup2: make a deep copy of a sparse matrix
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C = A, making a deep copy.  The header for C may already exist.
+
+// if numeric is false, C->x is allocated but not initialized.
+
+#include "GB.h"
+
+GrB_Info GB_dup2            // make an exact copy of a matrix
+(
+    GrB_Matrix *Chandle,    // handle of output matrix to create 
+    const GrB_Matrix A,     // input matrix to copy
+    const bool numeric,     // if true, duplicate the numeric values
+    const GrB_Type ctype,   // type of C, if numeric is false
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+
+    //--------------------------------------------------------------------------
+    // C = A
+    //--------------------------------------------------------------------------
+
+    if (A->nvec_nonempty < 0)
+    { 
+        A->nvec_nonempty = GB_nvec_nonempty (A, Context) ;
+    }
+
+    // [ create C; allocate C->p and do not initialize it
+    // C has the exact same hypersparsity as A.
+    GrB_Info info ;
+    int64_t anz = GB_NNZ (A) ;
+
+    // allocate a new header for C if (*Chandle) is NULL, or reuse the
+    // existing header if (*Chandle) is not NULL.
+    GrB_Matrix C = (*Chandle) ;
+
+    GB_CREATE (&C, numeric ? A->type : ctype, A->vlen, A->vdim, GB_Ap_malloc,
+        A->is_csc, GB_SAME_HYPER_AS (A->is_hyper), A->hyper_ratio, A->plen,
+        anz, true, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        return (info) ;
+    }
+
+    // copy the contents of A into C
+    int64_t anvec = A->nvec ;
+    C->nvec = anvec ;
+    C->nvec_nonempty = A->nvec_nonempty ;
+    int64_t *GB_RESTRICT Cp = C->p ;
+    int64_t *GB_RESTRICT Ch = C->h ;
+    int64_t *GB_RESTRICT Ci = C->i ;
+    const int64_t *GB_RESTRICT Ap = A->p ;
+    const int64_t *GB_RESTRICT Ah = A->h ;
+    const int64_t *GB_RESTRICT Ai = A->i ;
+
+    int nthreads = GB_nthreads (anvec, chunk, nthreads_max) ;
+    GB_memcpy (Cp, Ap, (anvec+1) * sizeof (int64_t), nthreads) ;
+    if (A->is_hyper)
+    { 
+        GB_memcpy (Ch, Ah, anvec * sizeof (int64_t), nthreads) ;
+    }
+
+    nthreads = GB_nthreads (anz, chunk, nthreads_max) ;
+    GB_memcpy (Ci, Ai, anz * sizeof (int64_t), nthreads) ;
+    if (numeric)
+    { 
+        GB_memcpy (C->x, A->x, anz * A->type->size, nthreads) ;
+    }
+
+    C->magic = GB_MAGIC ;      // C->p and C->h are now initialized ]
+    #ifdef GB_DEBUG
+    if (numeric) ASSERT_MATRIX_OK (C, "C duplicate of A", GB0) ;
+    #endif
+
+    //--------------------------------------------------------------------------
+    // return the result
+    //--------------------------------------------------------------------------
+
+    (*Chandle) = C ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_ek_slice.c b/Source/GB_ek_slice.c
index fe8a8fe19d..9e282b71a5 100644
--- a/Source/GB_ek_slice.c
+++ b/Source/GB_ek_slice.c
@@ -2,7 +2,7 @@
 // GB_ek_slice: slice the entries and vectors of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -46,8 +46,8 @@ bool GB_ek_slice        // true if successful, false if out of memory
     GB_CALLOC_MEMORY (kfirst_slice, ntasks, sizeof (int64_t)) ;
     GB_CALLOC_MEMORY (klast_slice, ntasks, sizeof (int64_t)) ;
 
-    if (pstart_slice == NULL || kfirst_slice == NULL | klast_slice == NULL)
-    {
+    if (pstart_slice == NULL || kfirst_slice == NULL || klast_slice == NULL)
+    { 
         GB_ek_slice_free (&pstart_slice, &kfirst_slice, &klast_slice, ntasks) ;
         return (false) ;
     }
diff --git a/Source/GB_ek_slice.h b/Source/GB_ek_slice.h
index 6c72b77a51..7e937e1d42 100644
--- a/Source/GB_ek_slice.h
+++ b/Source/GB_ek_slice.h
@@ -2,7 +2,7 @@
 // GB_ek_slice.h: slice the entries and vectors of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ek_slice_free.c b/Source/GB_ek_slice_free.c
index d8ab3bc6cf..5e2cf3570b 100644
--- a/Source/GB_ek_slice_free.c
+++ b/Source/GB_ek_slice_free.c
@@ -2,7 +2,7 @@
 // GB_ek_slice_free: free workspace created by GB_ek_slice
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_emult.c b/Source/GB_emult.c
index 80167ca6c5..890f2a2b24 100644
--- a/Source/GB_emult.c
+++ b/Source/GB_emult.c
@@ -2,7 +2,7 @@
 // GB_emult: C = A.*B or C<M>=A.*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GB_emult           // C=A.*B or C<M>=A.*B
     const GrB_Type ctype,   // type of output matrix C
     const bool C_is_csc,    // format of output matrix C
     const GrB_Matrix M,     // optional mask, unused if NULL.  Not complemented
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,     // input A matrix
     const GrB_Matrix B,     // input B matrix
     const GrB_BinaryOp op,  // op to perform C = op (A,B)
@@ -43,6 +44,8 @@ GrB_Info GB_emult           // C=A.*B or C<M>=A.*B
     // check inputs
     //--------------------------------------------------------------------------
 
+    GBBURBLE ((M == NULL) ? "emult " : "masked_emult ") ;
+
     ASSERT (Chandle != NULL) ;
     ASSERT_MATRIX_OK (A, "A for emult phased", GB0) ;
     ASSERT_MATRIX_OK (B, "B for emult phased", GB0) ;
@@ -121,7 +124,7 @@ GrB_Info GB_emult           // C=A.*B or C<M>=A.*B
         // from phase0:
         Cnvec, Ch, C_to_M, C_to_A, C_to_B,
         // original input:
-        M, A, B, Context) ;
+        M, Mask_struct, A, B, Context) ;
 
     if (info != GrB_SUCCESS)
     { 
@@ -150,7 +153,7 @@ GrB_Info GB_emult           // C=A.*B or C<M>=A.*B
         // from phase0:
         Cnvec, Ch, C_to_M, C_to_A, C_to_B,
         // original input:
-        M, A, B, Context) ;
+        M, Mask_struct, A, B, Context) ;
 
     // free workspace
     GB_FREE_MEMORY (TaskList, max_ntasks+1, sizeof (GB_task_struct)) ;
diff --git a/Source/GB_emult.h b/Source/GB_emult.h
index f9ca39a408..f633d6cf67 100644
--- a/Source/GB_emult.h
+++ b/Source/GB_emult.h
@@ -2,7 +2,7 @@
 // GB_emult.h: definitions for GB_emult
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_emult           // C=A.*B or C<M>=A.*B
     const GrB_Type ctype,   // type of output matrix C
     const bool C_is_csc,    // format of output matrix C
     const GrB_Matrix M,     // optional mask, unused if NULL.  Not complemented
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,     // input A matrix
     const GrB_Matrix B,     // input B matrix
     const GrB_BinaryOp op,  // op to perform C = op (A,B)
@@ -53,6 +54,7 @@ GrB_Info GB_emult_phase1                // count nnz in each C(:,j)
     const int64_t *GB_RESTRICT C_to_B,
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
@@ -79,6 +81,7 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
     const int64_t *GB_RESTRICT C_to_B,
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
diff --git a/Source/GB_emult_phase0.c b/Source/GB_emult_phase0.c
index ffe8a1c816..de36977a75 100644
--- a/Source/GB_emult_phase0.c
+++ b/Source/GB_emult_phase0.c
@@ -2,7 +2,7 @@
 // GB_emult_phase0: find vectors of C to compute for C=A.*B or C<M>=A.*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_emult_phase1.c b/Source/GB_emult_phase1.c
index b019baad17..aaa0de5bed 100644
--- a/Source/GB_emult_phase1.c
+++ b/Source/GB_emult_phase1.c
@@ -2,7 +2,7 @@
 // GB_emult_phase1: find # of entries in C=A.*B or C<M>=A.*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,6 +35,7 @@ GrB_Info GB_emult_phase1                // count nnz in each C(:,j)
     const int64_t *GB_RESTRICT C_to_B,
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
diff --git a/Source/GB_emult_phase2.c b/Source/GB_emult_phase2.c
index e8707189d7..51ee2dc651 100644
--- a/Source/GB_emult_phase2.c
+++ b/Source/GB_emult_phase2.c
@@ -2,7 +2,7 @@
 // GB_emult_phase2: C=A.*B or C<M>=A.*+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -47,6 +47,7 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
     const int64_t *GB_RESTRICT C_to_B,
     // original input:
     const GrB_Matrix M,                 // optional mask, may be NULL
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix B,
     GB_Context Context
@@ -64,8 +65,12 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
     ASSERT_MATRIX_OK_OR_NULL (M, "M for emult phase2", GB0) ;
     ASSERT (A->vdim == B->vdim) ;
     ASSERT (GB_Type_compatible (ctype,   op->ztype)) ;
-    ASSERT (GB_Type_compatible (A->type, op->xtype)) ;
-    ASSERT (GB_Type_compatible (B->type, op->ytype)) ;
+    ASSERT (GB_IMPLIES (
+           !(op->opcode == GB_SECOND_opcode || op->opcode == GB_PAIR_opcode),
+            GB_Type_compatible (A->type, op->xtype))) ;
+    ASSERT (GB_IMPLIES (
+           !(op->opcode == GB_FIRST_opcode  || op->opcode == GB_PAIR_opcode),
+            GB_Type_compatible (B->type, op->ytype))) ;
 
     //--------------------------------------------------------------------------
     // allocate the output matrix C
@@ -107,6 +112,23 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
 
     GB_Type_code ccode = ctype->code ;
 
+    //--------------------------------------------------------------------------
+    // check the types of A and B
+    //--------------------------------------------------------------------------
+
+    // With C = ewisemult (A,B), only the intersection of A and B is used.
+    // If op is SECOND or PAIR, the values of A are never accessed.
+    // If op is FIRST  or PAIR, the values of B are never accessed.
+    // If op is PAIR, the values of A and B are never accessed.
+    // Contrast with ewiseadd.
+
+    bool op_is_first  = op->opcode == GB_FIRST_opcode ;
+    bool op_is_second = op->opcode == GB_SECOND_opcode ;
+    bool op_is_pair   = op->opcode == GB_PAIR_opcode ;
+    // A is passed as x, and B as y, in z = op(x,y)
+    bool A_is_pattern = op_is_second || op_is_pair ;
+    bool B_is_pattern = op_is_first  || op_is_pair ;
+
     //--------------------------------------------------------------------------
     // using a built-in binary operator
     //--------------------------------------------------------------------------
@@ -121,7 +143,7 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
 
     #define GB_BINOP_WORKER(mult,xyname)                            \
     {                                                               \
-        info = GB_AemultB(mult,xyname) (C, M, A, B,                 \
+        info = GB_AemultB(mult,xyname) (C, M, Mask_struct, A, B,    \
             C_to_M, C_to_A, C_to_B, TaskList, ntasks, nthreads) ;   \
         done = (info != GrB_NO_VALUE) ;                             \
     }                                                               \
@@ -136,7 +158,7 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
         GB_Opcode opcode ;
         GB_Type_code xycode, zcode ;
 
-        if (GB_binop_builtin (A, false, B, false, op,
+        if (GB_binop_builtin (A->type, A_is_pattern, B->type, A_is_pattern, op,
             false, &opcode, &xycode, &zcode) && ccode == zcode)
         { 
             #include "GB_binop_factory.c"
@@ -146,11 +168,13 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
     #endif
 
     //--------------------------------------------------------------------------
-    // generic worker
+    // generic worker: with typecasting and arbitrary operators
     //--------------------------------------------------------------------------
 
     if (!done)
     { 
+        GB_BURBLE_MATRIX (C, "generic ") ;
+
         GxB_binary_function fmult ;
         size_t csize, asize, bsize, xsize, ysize, zsize ;
         GB_cast_function cast_A_to_X, cast_B_to_Y, cast_Z_to_C ;
@@ -201,6 +225,20 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
     // construct the final C->h
     //--------------------------------------------------------------------------
 
+    if (C_is_hyper)
+    {
+        C->nvec_nonempty = -1 ;
+        info = GB_hypermatrix_prune (C, Context) ;
+        if (info != GrB_SUCCESS)
+        { 
+            // out of memory
+            GB_MATRIX_FREE (&C) ;
+            return (info) ;
+        }
+    }
+
+#if 0
+    // see GB_hypermatrix_prune
     if (C_is_hyper)
     {
         // create new Cp_new and Ch_new arrays, with no empty vectors
@@ -226,6 +264,7 @@ GrB_Info GB_emult_phase2                // C=A.*B or C<M>=A.*B
         C->h_shallow = false ;
         ASSERT (C->nvec == C->nvec_nonempty) ;
     }
+#endif
 
     //--------------------------------------------------------------------------
     // return result
diff --git a/Source/GB_entry_check.c b/Source/GB_entry_check.c
index 95da5442ed..2e2146db47 100644
--- a/Source/GB_entry_check.c
+++ b/Source/GB_entry_check.c
@@ -2,7 +2,7 @@
 // GB_entry_check: print a single entry for a built-in type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_error.c b/Source/GB_error.c
index 7502996716..5ea6bbd369 100644
--- a/Source/GB_error.c
+++ b/Source/GB_error.c
@@ -2,7 +2,7 @@
 // GB_error: log an error string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_eslice.c b/Source/GB_eslice.c
index 5ff082e78d..54690e4514 100644
--- a/Source/GB_eslice.c
+++ b/Source/GB_eslice.c
@@ -2,7 +2,7 @@
 // GB_eslice: uniform partition of e items to each task 
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ewise.c b/Source/GB_ewise.c
index ae6c5ff2fe..284c7d5ccb 100644
--- a/Source/GB_ewise.c
+++ b/Source/GB_ewise.c
@@ -2,7 +2,7 @@
 // GB_ewise: C<M> = accum (C, A+B) or A.*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -16,6 +16,7 @@
 #include "GB_emult.h"
 #include "GB_transpose.h"
 #include "GB_accum_mask.h"
+#include "GB_dense.h"
 
 #define GB_FREE_ALL         \
 {                           \
@@ -31,13 +32,14 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, complement the mask M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_BinaryOp op,          // defines '+' for C=A+B, or .* for A.*B
     const GrB_Matrix A,             // input matrix
     bool A_transpose,               // if true, use A' instead of A
     const GrB_Matrix B,             // input matrix
     bool B_transpose,               // if true, use B' instead of B
-    const bool eWiseAdd,            // if true, do set union (like A+B),
+    bool eWiseAdd,                  // if true, do set union (like A+B),
                                     // otherwise do intersection (like A.*B)
     GB_Context Context
 )
@@ -149,31 +151,67 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
         C_is_csc = !C_is_csc ;
     }
 
+    //--------------------------------------------------------------------------
+    // determine if any matrices are dense
+    //--------------------------------------------------------------------------
+
+    bool C_is_dense = GB_is_dense (C) && !GB_PENDING_OR_ZOMBIES (C) ;
+    bool A_is_dense = GB_is_dense (A) ;
+    bool B_is_dense = GB_is_dense (B) ;
+    bool M_is_dense = GB_is_dense (M) ;
+
+    if (C_is_dense) { GBBURBLE ("(C dense) ") ; }
+    if (A_is_dense) { GBBURBLE ("(A dense) ") ; }
+    if (B_is_dense) { GBBURBLE ("(B dense) ") ; }
+    if (M_is_dense) { GBBURBLE ("(M dense) ") ; }
+
     //--------------------------------------------------------------------------
     // decide when to apply the mask
     //--------------------------------------------------------------------------
 
     // GB_add and GB_emult can apply any non-complemented mask, but it is
     // faster to exploit the mask in GB_add / GB_emult only when it is very
-    // sparse compared with A and B.
+    // sparse compared with A and B, or (in special cases) when it is easy
+    // to apply.
 
     // check the CSR/CSC format of M
     bool M_is_csc = (M == NULL) ? C_is_csc : M->is_csc ;
-
     bool mask_applied = false ;
     GrB_Matrix M1 = NULL ;
 
-    if (M != NULL && !Mask_comp && GB_MASK_VERY_SPARSE (M, A, B))
+    if (M != NULL && !Mask_comp)
     {
-        // the mask is present, not complemented, and very sparse; use it
-        // during GB_add and GB_emult to reduce memory and work.
-        M1 = M ;
-        if (C_is_csc != M_is_csc)
+        // mask is present, not complemented; see if it is quick or easy to use.
+        // it may be a structural or valued mask.
+        bool mask_is_easy = (A_is_dense || (A == M))    // A is easy
+                         && (B_is_dense || (B == M)) ;  // and B is easy
+        bool mask_is_very_sparse = GB_MASK_VERY_SPARSE (M, A, B) ;
+        if (mask_is_easy || mask_is_very_sparse)
+        {
+            // the mask is present, not complemented, and very sparse or easy
+            // to exploit ; use it during GB_add and GB_emult to reduce memory
+            // and work.
+            M1 = M ;
+            if (C_is_csc != M_is_csc)
+            { 
+                GBBURBLE ("(M transpose) ") ;
+                GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M, NULL, Context));
+                M1 = MT ;
+            }
+            mask_applied = true ;
+            if (mask_is_easy)
+            { 
+                GBBURBLE ("(mask is easy) ") ;
+            }
+            else // mask_is_very_sparse
+            { 
+                GBBURBLE ("(mask applied) ") ;
+            }
+        }
+        else
         { 
-            GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M, NULL, Context)) ;
-            M1 = MT ;
+            GBBURBLE ("(mask later) ") ;
         }
-        mask_applied = true ;
     }
 
     //--------------------------------------------------------------------------
@@ -185,6 +223,7 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
     { 
         // AT = A'
         // transpose: no typecast, no op, not in place
+        GBBURBLE ("(A transpose) ") ;
         GB_OK (GB_transpose (&AT, NULL, C_is_csc, A, NULL, Context)) ;
         A1 = AT ;
     }
@@ -198,21 +237,97 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
     { 
         // BT = B'
         // transpose: no typecast, no op, not in place
+        GBBURBLE ("(B transpose) ") ;
         GB_OK (GB_transpose (&BT, NULL, C_is_csc, B, NULL, Context)) ;
         B1 = BT ;
     }
 
+    //--------------------------------------------------------------------------
+    // special cases
+    //--------------------------------------------------------------------------
+
+    // FUTURE::: handle more special cases
+
+    if (A_is_dense && B_is_dense)
+    { 
+        // no need to use eWiseAdd if both A and B are dense
+        eWiseAdd = false ;
+    }
+
+    bool no_typecast =
+        (op->ztype == C->type)              // no typecasting of C
+        && (op->xtype == A1->type)          // no typecasting of A
+        && (op->ytype == B1->type) ;        // no typecasting of B
+
+    #ifndef GBCOMPACT
+
+        // FUTURE: for sssp12:
+        // C<A> = A+B where C is sparse and B is dense;
+        // mask is structural, not complemented, C_replace is false.
+        // C is not empty.  Use a kernel that computes T<A>=A+B
+        // where T starts out empty; just iterate over the entries in A.
+
+    if (A_is_dense                          // A and B are dense
+        && B_is_dense
+        && (M == NULL) && !Mask_comp        // no mask
+        && (C->is_csc == C_is_csc)          // no transpose of C
+        && no_typecast                      // no typecasting
+        && (op->opcode < GB_USER_opcode)    // not a user-defined operator
+        )
+    {
+
+        if (C_is_dense                      // C is dense
+        && accum == op                      // accum is same as the op
+        && (op->opcode >= GB_MIN_opcode)    // subset of binary operators
+        && (op->opcode <= GB_RDIV_opcode))
+        { 
+
+            //------------------------------------------------------------------
+            // C += A+B where all 3 matrices are dense
+            //------------------------------------------------------------------
+
+            // C_replace is ignored
+            GBBURBLE ("dense C+=A+B ") ;
+            GB_dense_ewise3_accum (C, A1, B1, op, Context) ;    // cannot fail
+            GB_FREE_ALL ;
+            ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C+=A+B", GB0) ;
+            return (GrB_SUCCESS) ;
+
+        }
+        else if (accum == NULL)             // no accum
+        { 
+
+            //------------------------------------------------------------------
+            // C = A+B where A and B are dense (C is anything)
+            //------------------------------------------------------------------
+
+            // C_replace is ignored
+            GBBURBLE ("dense C=A+B ") ;
+            info = GB_dense_ewise3_noaccum (C, C_is_dense, A1, B1, op, Context);
+            GB_FREE_ALL ;
+            if (info == GrB_SUCCESS)
+            {
+                ASSERT_MATRIX_OK (C, "C output for GB_ewise, dense C=A+B", GB0);
+            }
+            return (info) ;
+        }
+    }
+
+    #endif
+
     //--------------------------------------------------------------------------
     // T = A+B or A.*B
     //--------------------------------------------------------------------------
 
     if (eWiseAdd)
     { 
-        GB_OK (GB_add (&T, T_type, C_is_csc, M1, A1, B1, op, Context)) ;
+        GB_OK (GB_add (&T, T_type, C_is_csc, M1, Mask_struct, A1, B1, op,
+            Context)) ;
     }
     else
     { 
-        GB_OK (GB_emult (&T, T_type, C_is_csc, M1, A1, B1, op, Context)) ;
+        GB_OK (GB_emult (&T, T_type, C_is_csc, M1, Mask_struct, A1, B1, op,
+            Context)) ;
     }
 
     //--------------------------------------------------------------------------
@@ -245,7 +360,7 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
         // C<M> = accum (C,T)
         // GB_accum_mask also conforms C to its desired hypersparsity
         info = GB_accum_mask (C, M, MT, accum, &T, C_replace, Mask_comp,
-            Context) ;
+            Mask_struct, Context) ;
         GB_MATRIX_FREE (&MT) ;
         return (info) ;
     }
diff --git a/Source/GB_ewise.h b/Source/GB_ewise.h
index 622a3e4ed0..d3ecf59185 100644
--- a/Source/GB_ewise.h
+++ b/Source/GB_ewise.h
@@ -2,7 +2,7 @@
 // GB_ewise.h: definitions for GB_ewise
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,13 +17,14 @@ GrB_Info GB_ewise                   // C<M> = accum (C, A+B) or A.*B
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, complement the mask M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_BinaryOp op,          // defines '+' for C=A+B, or .* for A.*B
     const GrB_Matrix A,             // input matrix
     bool A_transpose,               // if true, use A' instead of A
     const GrB_Matrix B,             // input matrix
     bool B_transpose,               // if true, use B' instead of B
-    const bool eWiseAdd,            // if true, do set union (like A+B),
+    bool eWiseAdd,                  // if true, do set union (like A+B),
                                     // otherwise do intersection (like A.*B)
     GB_Context Context
 ) ;
diff --git a/Source/GB_ewise_slice.c b/Source/GB_ewise_slice.c
index 0fdd1376c0..4b50296bbd 100644
--- a/Source/GB_ewise_slice.c
+++ b/Source/GB_ewise_slice.c
@@ -2,7 +2,7 @@
 // GB_ewise_slice: slice the entries and vectors for an ewise operation
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -260,7 +260,7 @@ GrB_Info GB_ewise_slice
     //--------------------------------------------------------------------------
 
     if (!GB_pslice (&Coarse, Cwork, Cnvec, ntasks1))
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_export.h b/Source/GB_export.h
index e7fd150c9b..a102358855 100644
--- a/Source/GB_export.h
+++ b/Source/GB_export.h
@@ -2,7 +2,7 @@
 // GB_export.h: definitions for import/export
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -38,7 +38,7 @@
             nvals, GB_INDEX_MAX))) ;                            \
     }                                                           \
     /* get the descriptor */                                    \
-    GB_GET_DESCRIPTOR (info, desc, xx1, xx2, xx3, xx4, xx5) ;
+    GB_GET_DESCRIPTOR (info, desc, xx1, xx2, xx3, xx4, xx5, xx6) ;
 
 #define GB_EXPORT_CHECK                                         \
     GB_RETURN_IF_NULL (A) ;                                     \
@@ -53,7 +53,7 @@
     GB_RETURN_IF_NULL (nvals) ;                                 \
     GB_RETURN_IF_NULL (nonempty) ;                              \
     /* get the descriptor */                                    \
-    GB_GET_DESCRIPTOR (info, desc, xx1, xx2, xx3, xx4, xx5) ;   \
+    GB_GET_DESCRIPTOR (info, desc, xx1, xx2, xx3, xx4, xx5, xx6) ; \
     /* export basic attributes */                               \
     (*type) = (*A)->type ;                                      \
     (*nrows) = GB_NROWS (*A) ;                                  \
diff --git a/Source/GB_extract.c b/Source/GB_extract.c
index 1336073305..f5af3ac2d6 100644
--- a/Source/GB_extract.c
+++ b/Source/GB_extract.c
@@ -2,7 +2,7 @@
 // GB_extract: C<M> = accum(C,A(I,J))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GB_extract                 // C<M> = accum (C, A(I,J))
     const bool C_replace,           // C matrix descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // mask descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_Matrix A,             // input matrix
     const bool A_transpose,         // A matrix descriptor
@@ -188,6 +189,6 @@ GrB_Info GB_extract                 // C<M> = accum (C, A(I,J))
     //--------------------------------------------------------------------------
 
     return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+        Mask_struct, Context)) ;
 }
 
diff --git a/Source/GB_extract.h b/Source/GB_extract.h
index bd9015571b..96bb5325fa 100644
--- a/Source/GB_extract.h
+++ b/Source/GB_extract.h
@@ -2,7 +2,7 @@
 // GB_extract.h: definitions for GB_extract
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_extract                 // C<M> = accum (C, A(I,J))
     const bool C_replace,           // C matrix descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // mask descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_Matrix A,             // input matrix
     const bool A_transpose,         // A matrix descriptor
diff --git a/Source/GB_extractElement.c b/Source/GB_extractElement.c
index 98bb1dfb9b..134e9cfe59 100644
--- a/Source/GB_extractElement.c
+++ b/Source/GB_extractElement.c
@@ -2,7 +2,7 @@
 // GB_extractElement: x = A(row,col)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_extractTuples.c b/Source/GB_extractTuples.c
index 71c94350a1..020bdcfaf9 100644
--- a/Source/GB_extractTuples.c
+++ b/Source/GB_extractTuples.c
@@ -2,7 +2,7 @@
 // GB_extractTuples: extract all the tuples from a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -113,7 +113,7 @@ GrB_Info GB_extractTuples       // extract all tuples from a matrix
     if (J != NULL)
     {
         if (!GB_extract_vector_list ((int64_t *) J, A, nthreads))
-        {
+        { 
             // out of memory
             return (GB_OUT_OF_MEMORY) ;
         }
diff --git a/Source/GB_extract_vector_list.c b/Source/GB_extract_vector_list.c
index b9585375f0..35197acfb2 100644
--- a/Source/GB_extract_vector_list.c
+++ b/Source/GB_extract_vector_list.c
@@ -2,7 +2,7 @@
 // GB_extract_vector_list: extract vector indices for all entries in a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -60,7 +60,7 @@ bool GB_extract_vector_list     // true if successful, false if out of memory
 
     int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
     if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
-    {
+    { 
         // out of memory
         return (false) ;
     }
diff --git a/Source/GB_fine_slice.c b/Source/GB_fine_slice.c
deleted file mode 100644
index 43ffa65511..0000000000
--- a/Source/GB_fine_slice.c
+++ /dev/null
@@ -1,187 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_fine_slice: create fine hyperslices of B
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// For each thread tid, create Bslice [tid] as a fine hyperslice of B.  The i
-// and x arrays are the same as B.  When this function returns, the rest of
-// GraphBLAS will view Bslice [tid] as a hyperslice, but with non-shallow
-// Bslice [tid]->p and either shallow Bslice [tid]->h (if B is hypersparse) or
-// non-shallow Bslice [tid]->h (if B is sparse).
-
-// For each fine hyperslice, Bslice [tid]->p is allocated and created here; it
-// is not shallow (unlike the coarse slices computed by GB_slice).
-
-// Bslice [tid]->i and Bslice [tid]->x are offset pointers into B, so that
-// Bslice [tid]->p [0] == 0 for all slices tid.
-
-// if B is hypersparse, then Bslice [tid]->h is a shallow pointer into B->h,
-// where Bslice [tid]->h [0] is the same as B->h [k] if the kth vector of B is
-// the first vector of Bslice [tid].
-
-// The matrix dimensions of each slice are the same as B.  All slices have
-// vector length B->vlen and vector dimension B->vdim.   The slices are subsets
-// of the entries of B, as defined by the Slice array.  The Bslice [tid]
-// consists of the entries Slice [tid] to Slice [tid+1]-1 of B.
-
-// This function does O(nthreads+B->nvec) work and allocates up to
-// O(nthreads+B->nvec) space, so it could be parallel, but it will tend to be
-// used when B->nvec is small (even 1, for GrB_mxv and GrB_vxm).  So it does
-// not need to be parallel.
-
-#include "GB_mxm.h"
-#include "GB_ek_slice.h"
-
-GrB_Info GB_fine_slice  // slice B into nthreads fine hyperslices
-(
-    GrB_Matrix B,       // matrix to slice
-    int nthreads,       // # of slices to create
-    int64_t *Slice,     // array of size nthreads+1 that defines the slice
-    GrB_Matrix *Bslice, // array of output slices, of size nthreads
-    GB_Context Context
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT_MATRIX_OK (B, "B to slice", GB0) ;
-    ASSERT (nthreads > 1) ;
-    ASSERT (Bslice != NULL) ;
-    ASSERT (Slice != NULL) ;
-    ASSERT (Slice [0] == 0) ;
-    ASSERT (Slice [nthreads] == GB_NNZ (B)) ;
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-        ASSERT (Slice [tid] <= Slice [tid+1]) ;
-    }
-
-    GrB_Info info ;
-
-    //--------------------------------------------------------------------------
-    // create the hyperslices
-    //--------------------------------------------------------------------------
-
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-
-        // Bslice [tid] will contain entries pfirst:plast-1 of B.
-        int64_t pfirst = Slice [tid] ;
-        int64_t plast  = Slice [tid+1] - 1 ;
-        int64_t bslice_nz = plast - pfirst + 1 ;
-        int64_t bvec_first = 0 ;
-        int64_t bslice_nvec = 0 ;
-
-        if (bslice_nz > 0)
-        {
-
-            // find the first column of Bslice [tid]: the column that contains
-            // the entry at Bi [pfirst] and Bx [pfirst]
-
-            bvec_first = GB_search_for_vector (pfirst, B->p, 0, B->nvec) ;
-
-            ASSERT (B->p [bvec_first] <= pfirst) ;
-            ASSERT (pfirst <= B->p [bvec_first+1]) ;
-
-            // find the last column of Bslice [tid]: the column that contains
-            // the entry at Bi [plast] and Bx [plast]
-
-            int64_t bvec_last = GB_search_for_vector (plast, B->p, 0, B->nvec) ;
-
-            ASSERT (B->p [bvec_last] <= plast && plast < B->p [bvec_last+1]) ;
-
-            // total number of vectors in B
-            bslice_nvec = bvec_last - bvec_first + 1 ;
-
-        }
-
-        // allocate Bslice [tid].  Bslice [tid]->p is always allocated.  Bslice
-        // [tid] will always eventually be hypersparse.  However,
-        // Bslice[tid]->h will be a shallow offset into B->h if B is
-        // hypersparse, so GB_new should not allocate h (initially creating a
-        // non-hypersparse Bslice [tid]).  If B is not hypersparse, then
-        // Bslice[tid]->h must be allocated.  As a result, GB_new should create
-        // Bslice [tid] as initially hypersparse if B is not hypersparse.
-        // Thus, in both cases, GB_new constructs Bslice [tid] with the
-        // opposite hypersparsity status of B.
-
-        Bslice [tid] = NULL ;
-        GB_NEW (&(Bslice [tid]), B->type, B->vlen, B->vdim, GB_Ap_malloc,
-            B->is_csc, GB_SAME_HYPER_AS (!(B->is_hyper)), GB_ALWAYS_HYPER,
-            bslice_nvec, NULL) ;
-        if (info != GrB_SUCCESS)
-        {
-            // out of memory
-            for (int i = 0 ; i < tid ; i++)
-            { 
-                GB_MATRIX_FREE (&(Bslice [i])) ;
-            }
-            return (info) ;
-        }
-
-        // Bslice [tid] is always a hyperslice
-        (Bslice [tid])->is_hyper = true ;
-        (Bslice [tid])->is_slice = true ;
-        (Bslice [tid])->hfirst = 0 ;      // unused
-        (Bslice [tid])->plen = bslice_nvec ;
-        (Bslice [tid])->nvec = bslice_nvec ;
-
-        // Bslice has shallow pointers into B->i and B->x
-        (Bslice [tid])->i = B->i + pfirst ;
-        (Bslice [tid])->i_shallow = true ;
-        GB_void *GB_RESTRICT Bx = B->x ;
-        (Bslice [tid])->x = Bx + pfirst * B->type->size ;
-        (Bslice [tid])->x_shallow = true ;
-
-        // Bslice->h hyperlist
-        if (B->is_hyper)
-        { 
-            // B is hypersparse; the columns of Bslice [tid] are B->h
-            // [bvec_first:bvec_last].  Bslice [tid] is a hyperslice (with an
-            // explict h list, as a shallow pointer into B->h).
-            ASSERT ((Bslice [tid])->h == NULL) ;
-            (Bslice [tid])->h = B->h + bvec_first ;
-            (Bslice [tid])->h_shallow = true ;
-        }
-        else
-        { 
-            // the columns of Bslice [tid] are [bvec_first:bvec_last].
-            // Bslice [tid] is a hyperslice (with an explicit h list)
-            ASSERT ((Bslice [tid])->h != NULL) ;
-            ASSERT ((Bslice [tid])->h_shallow == false) ;
-            for (int64_t k = 0 ; k < bslice_nvec ; k++)
-            {
-                (Bslice [tid])->h [k] = bvec_first + k ;
-            }
-        }
-
-        // Bslice->p is always allocated fresh by GB_new.
-        ASSERT ((Bslice [tid])->p != NULL) ;
-        ASSERT ((Bslice [tid])->p_shallow == false) ;
-        (Bslice [tid])->p [0] = 0 ;
-        for (int64_t k = 1 ; k < bslice_nvec ; k++)
-        { 
-            // construct Bslice [tid]->p
-            (Bslice [tid])->p [k] = B->p [bvec_first + k] - pfirst ;
-        }
-        (Bslice [tid])->p [bslice_nvec] = bslice_nz ;
-        (Bslice [tid])->nvec_nonempty = -1 ;
-
-        (Bslice [tid])->nzmax = bslice_nz ;
-        (Bslice [tid])->magic = GB_MAGIC ;
-
-        ASSERT_MATRIX_OK (Bslice [tid], "Bslice", GB0) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // return the slices
-    //--------------------------------------------------------------------------
-
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_free.c b/Source/GB_free.c
index d3829ec3a6..ef2636a902 100644
--- a/Source/GB_free.c
+++ b/Source/GB_free.c
@@ -2,7 +2,7 @@
 // GB_free: free a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_free_memory.c b/Source/GB_free_memory.c
index ad9253bd14..170560a9bc 100644
--- a/Source/GB_free_memory.c
+++ b/Source/GB_free_memory.c
@@ -2,7 +2,7 @@
 // GB_free_memory: wrapper for free (used via the GB_FREE_MEMORY macro)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_hcat_fine_slice.c b/Source/GB_hcat_fine_slice.c
deleted file mode 100644
index 6fc3a6275d..0000000000
--- a/Source/GB_hcat_fine_slice.c
+++ /dev/null
@@ -1,394 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_hcat_fine_slice: horizontal concatenation and summation of slices of C
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Horizontal concatenation and summation of fine slices into the matrix C.
-
-#include "GB_mxm.h"
-#include "GB_Sauna.h"
-#include "GB_sort.h"
-
-GrB_Info GB_hcat_fine_slice // horizontal concatenation and sum of slices of C
-(
-    GrB_Matrix *Chandle,    // output matrix C to create
-    int nthreads,           // # of slices to concatenate
-    GrB_Matrix *Cslice,     // array of slices of size nthreads
-    GrB_Monoid add,         // monoid to use to sum up the entries
-    int *Sauna_ids,         // size nthreads, Sauna id's of each thread
-    GB_Context Context
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (nthreads > 1) ;
-    ASSERT (Chandle != NULL) ;
-    ASSERT (*Chandle == NULL) ;
-    ASSERT (Cslice != NULL) ;
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-        ASSERT_MATRIX_OK (Cslice [tid], "a fine slice of C", GB0) ;
-        ASSERT (!GB_PENDING (Cslice [tid])) ;
-        ASSERT (!GB_ZOMBIES (Cslice [tid])) ;
-        ASSERT ((Cslice [tid])->is_hyper) ;
-        // each Cslice [tid] is constructed as its own matrix, with
-        // Cslice [tid] = A * Bslice [tid].  It is not a slice of an other
-        // matrix, so Cslice [tid]->is_slice is false.
-        ASSERT (!(Cslice [tid])->is_slice) ;
-        ASSERT ((Cslice [tid])->type == (Cslice [0])->type) ;
-        ASSERT ((Cslice [tid])->vlen == (Cslice [0])->vlen) ;
-        ASSERT ((Cslice [tid])->vdim == (Cslice [0])->vdim) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // find the size and type of C
-    //--------------------------------------------------------------------------
-
-    // all the slices have the same type and dimension
-    GrB_Type ctype = (Cslice [0])->type ;
-    int64_t  cvlen = (Cslice [0])->vlen ;
-    int64_t  cvdim = (Cslice [0])->vdim ;
-
-    // cnz and cnvec are upper bounds; exact values computed later
-    int64_t cnz = 0 ;               // upper bound on nnz(C)
-    int64_t cnvec = 0 ;             // upper bound on # vectors in C
-
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    { 
-        // compute the cumulative sum of the # entries and # vectors
-        cnz   += GB_NNZ (Cslice [tid]) ;
-        cnvec += (Cslice [tid])->nvec ;
-    }
-
-    //--------------------------------------------------------------------------
-    // create C and allocate all of its space
-    //--------------------------------------------------------------------------
-
-    #define GB_FREE_ALL 
-
-    GrB_Info info ;
-    GB_CREATE (Chandle, ctype, cvlen, cvdim, GB_Ap_malloc, true,
-        GB_FORCE_HYPER, GB_Global_hyper_ratio_get ( ), cnvec, cnz, true,
-        Context) ;
-    if (info != GrB_SUCCESS)
-    { 
-        // out of memory
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    #undef  GB_FREE_ALL 
-    #define GB_FREE_ALL                     \
-    {                                       \
-        GB_MATRIX_FREE (Chandle) ;          \
-    }
-
-    GrB_Matrix C = (*Chandle) ;
-
-    int64_t *GB_RESTRICT Ch = C->h ;
-    int64_t *GB_RESTRICT Cp = C->p ;
-    int64_t *GB_RESTRICT Ci = C->i ;
-    GB_void *GB_RESTRICT Cx = C->x ;
-    size_t csize = ctype->size ;
-
-    C->nvec_nonempty = -1 ;
-
-    //--------------------------------------------------------------------------
-    // acquire a single Sauna
-    //--------------------------------------------------------------------------
-
-    // FUTURE: use a hypersparse-friendly method instead of the Sauna
-
-    GB_Sauna Sauna = NULL ;
-    int Sauna_id = -2 ;
-    GB_OK (GB_Sauna_acquire (1, &Sauna_id, NULL, Context)) ;
-    Sauna = GB_Global_Saunas_get (Sauna_id) ;
-
-    if (Sauna == NULL || Sauna->Sauna_n < cvlen || Sauna->Sauna_size < csize)
-    {
-        // The Sauna id has been acquired, but the Sauna is either NULL
-        // (not yet allocated) or it is too small.
-        GB_Sauna_free (Sauna_id) ;
-        info = GB_Sauna_alloc (Sauna_id, cvlen, csize) ;
-        if (info != GrB_SUCCESS)
-        { 
-            // out of memory
-            GB_FREE_ALL ;
-            GB_OK (GB_Sauna_release (1, &Sauna_id)) ;
-            return (GB_OUT_OF_MEMORY) ;
-        }
-    }
-    Sauna = GB_Global_Saunas_get (Sauna_id) ;
-
-    // Sauna has been acquired
-    ASSERT (Sauna != NULL) ;
-    ASSERT (Sauna->Sauna_n >= cvlen) ;
-    ASSERT (Sauna->Sauna_size >= csize) ;
-
-    // hiwater++
-    int64_t hiwater = GB_Sauna_reset (Sauna, 1, 0) ;
-    int64_t *GB_RESTRICT Sauna_Mark = Sauna->Sauna_Mark ;
-
-    // Sauna_Mark [0..cvlen-1] < hiwater holds
-    ASSERT_SAUNA_IS_RESET ;
-
-    // Sauna_Work has size cvlen, each entry of size csize.  Not initialized.
-    GB_void *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-
-    //--------------------------------------------------------------------------
-    // copy and sum each slice into C
-    //--------------------------------------------------------------------------
-
-    cnz = 0 ;           // now compute the exact cnz and cnvec of C
-    cnvec = 0 ;
-
-    // last_vector of prior slice
-    int64_t last_vector = -1 ;
-
-    GxB_binary_function fadd = add->op->function ;
-    ASSERT (C->type == add->op->ztype) ;
-
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-
-        //----------------------------------------------------------------------
-        // get the Cslice [tid] and its position in C
-        //----------------------------------------------------------------------
-
-        ASSERT_MATRIX_OK (Cslice [tid], "Cslice [tid]", GB0) ;
-
-        int64_t *GB_RESTRICT Csliceh = (Cslice [tid])->h ;
-        int64_t *GB_RESTRICT Cslicep = (Cslice [tid])->p ;
-        int64_t *GB_RESTRICT Cslicei = (Cslice [tid])->i ;
-        GB_void *GB_RESTRICT Cslicex = (Cslice [tid])->x ;
-        int64_t cnz_slice   = GB_NNZ (Cslice [tid]) ;
-        int64_t cnvec_slice = (Cslice [tid])->nvec ;
-
-        // skip if Cslice [tid] is empty
-        if (cnvec_slice == 0) continue ;
-
-        //----------------------------------------------------------------------
-        // discard the first vector in Cslice [tid], if already summed
-        //----------------------------------------------------------------------
-
-        int64_t kfirst = 0 ;
-        if (Csliceh [0] == last_vector)
-        { 
-            kfirst = 1 ;
-        }
-
-        // skip if Cslice [tid] is now empty
-        if (cnvec_slice - kfirst == 0) continue ;
-
-        //----------------------------------------------------------------------
-        // search for last vector in subsequent slices
-        //----------------------------------------------------------------------
-
-        // search for the last vector of Cslice [tid] in all slices tid+1:tfine
-        // that contain it as their first vector
-        last_vector = Csliceh [cnvec_slice-1] ;
-        int tfine = tid ;
-        while (tfine+1 < nthreads)
-        {
-            if ((Cslice [tfine+1])->nvec == 0)
-            { 
-                // slice tfine+1 is empty; include it and keep looking
-                tfine++ ;
-                continue ;
-            }
-            int64_t first_vector = (Cslice [tfine+1])->h [0] ;
-            if (last_vector == first_vector)
-            { 
-                // the last vector of Cslice [tid] is the same as the first
-                // vector of Cslice [tfine+1].  Add tfine+1 to the list, and
-                // continue looking for last_vector in subsequent slices.
-                tfine++ ;
-                continue ;
-            }
-            else
-            { 
-                // Cslice [tfine+1] starts with a different vector than the
-                // last vector of Cslice [tid], so it is not part of the list.
-                break ;
-            }
-        }
-
-        #ifdef GB_DEBUG
-        // check the list
-        for (int tid2 = tid + 1 ; tid2 <= tfine ; tid2++)
-        {
-            // slice tid2 is in the set tid+1:tfine.  If it has any vectors, its
-            // first vector is last_vector
-            ASSERT ((Cslice [tid2])->nvec >= 0) ;
-            if ((Cslice [tid2])->nvec > 0) 
-            {
-                ASSERT ((Cslice [tid2])->h [0] == last_vector) ;
-            }
-        }
-        if (tfine+1 < nthreads)
-        {
-            // slice tfine+1, if it exists, contains at least one vector, and
-            // it differs from last_vector
-            ASSERT ((Cslice [tfine+1])->nvec > 0) ;
-            ASSERT ((Cslice [tfine+1])->h [0] != last_vector) ;
-        }
-        #endif
-
-        //----------------------------------------------------------------------
-        // copy the bulk of Cslice [tid] into C
-        //----------------------------------------------------------------------
-
-        // exclude the first vector from Cslice [tid] if it's been discarded
-        int64_t pfirst = Cslicep [kfirst] ;
-
-        // copy the row indices of Cslice [tid] into Ci and Cx
-        memcpy (Ci +(cnz), Cslicei +(pfirst),
-            (cnz_slice - pfirst) * sizeof (int64_t)) ;
-
-        // copy the values of Cslice [tid] into Ci and Cx
-        memcpy (Cx +(cnz * csize), Cslicex +(pfirst * csize),
-            (cnz_slice - pfirst) * csize) ;
-
-        // copy the column indices of Cslice into Ch
-        memcpy (Ch +(cnvec), Csliceh +(kfirst),
-            (cnvec_slice - kfirst) * sizeof (int64_t)) ;
-
-        // construct the column pointers of C (shift upwards by cnz)
-        for (int64_t k = kfirst ; k < cnvec_slice ; k++)
-        { 
-            Cp [cnvec++] = Cslicep [k] + cnz - pfirst ;
-        }
-
-        // entries and vectors have been appended to C
-        cnz   += (cnz_slice - pfirst) ;
-
-        // the last_vector in C starts at Ci [cnz_last] and Cx [cnz_last]
-        int64_t cnz_last = Cp [cnvec-1] ;
-
-        //----------------------------------------------------------------------
-        // handle the last vector, if it needs summation
-        //----------------------------------------------------------------------
-
-        if (tfine > tid)
-        {
-
-            //------------------------------------------------------------------
-            // scatter the last_vector from Cslice [tid] into the Sauna
-            //------------------------------------------------------------------
-
-            int64_t pstart = Cslicep [cnvec_slice-1] ;
-            int64_t pend   = Cslicep [cnvec_slice] ;
-            for (int64_t p = pstart ; p < pend ; p++)
-            { 
-                int64_t i = Cslicei [p] ;
-                Sauna_Mark [i] = hiwater ;
-                // Sauna_Work [i] = Cslicex [p]
-                memcpy (Sauna_Work +(i*csize), Cslicex +(p*csize), csize) ;
-            }
-
-            bool unsorted = false ;
-
-            //------------------------------------------------------------------
-            // scatter and add each subsequent vector
-            //------------------------------------------------------------------
-
-            for (int tid2 = tid + 1 ; tid2 <= tfine ; tid2++)
-            {
-                // skip if Cslice [tid2] is empty
-                if ((Cslice [tid2])->nvec == 0) continue ;
-
-                int64_t *GB_RESTRICT Cslice2p = (Cslice [tid2])->p ;
-                int64_t *GB_RESTRICT Cslice2i = (Cslice [tid2])->i ;
-                GB_void *GB_RESTRICT Cslice2x = (Cslice [tid2])->x ;
-
-                // scatter/add first vector from Cslice [tid2] into the Sauna
-                int64_t pstart = Cslice2p [0] ;
-                int64_t pend   = Cslice2p [1] ;
-                for (int64_t p = pstart ; p < pend ; p++)
-                {
-                    int64_t i = Cslice2i [p] ;
-                    if (Sauna_Mark [i] < hiwater)
-                    { 
-                        // first time row index i has been seen
-                        Sauna_Mark [i] = hiwater ;
-                        // Sauna_Work [i] = Cslice2x [p]
-                        memcpy (Sauna_Work +(i*csize), Cslice2x +(p*csize),
-                                csize) ;
-                        // append row index i to C
-                        Ci [cnz++] = i ;
-                        unsorted = true ;
-                    }
-                    else
-                    { 
-                        // C(i,last_vector) += Cslice2 (i,last_vector)
-                        fadd (Sauna_Work +(i*csize), Sauna_Work +(i*csize),
-                              Cslice2x +(p*csize)) ;
-                    }
-                }
-            }
-
-            //------------------------------------------------------------------
-            // sort the pattern of C(:,j)
-            //------------------------------------------------------------------
-
-            if (unsorted)
-            {
-                // sort the pattern of C(:,j)
-                int64_t len = cnz - cnz_last ;
-                if (len == cvlen)
-                {
-                    // no need to sort C(:,j) if dense; just recreate it
-                    for (int64_t pC = cnz_last, i = 0 ; pC < cnz ; pC++, i++)
-                    { 
-                        Ci [pC] = i ;
-                    }
-                }
-                else
-                { 
-                    // sort the nonzero indices in C(:,j)
-                    GB_qsort_1a (Ci + cnz_last, len) ;
-                }
-            }
-
-            //------------------------------------------------------------------
-            // gather the values into C(:,j)
-            //------------------------------------------------------------------
-
-            for (int64_t pC = cnz_last ; pC < cnz ; pC++)
-            { 
-                int64_t i = Ci [pC] ;
-                // Cx [pC] = Sauna_Work [i]
-                memcpy (Cx +(pC*csize), Sauna_Work +(i*csize), csize) ;
-            }
-
-            //------------------------------------------------------------------
-            // hiwater++
-            //------------------------------------------------------------------
-
-            hiwater = GB_Sauna_reset (Sauna, 1, 0) ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // release the Sauna workspace
-    //--------------------------------------------------------------------------
-
-    GB_OK (GB_Sauna_release (1, &Sauna_id)) ;
-
-    //--------------------------------------------------------------------------
-    // finalize the matrix
-    //--------------------------------------------------------------------------
-
-    C->nvec = cnvec ;
-    Cp [cnvec] = cnz ;
-    C->magic = GB_MAGIC ;
-    ASSERT_MATRIX_OK (C, "C from fine concatenation", GB0) ;
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_hcat_slice.c b/Source/GB_hcat_slice.c
deleted file mode 100644
index 4facefb296..0000000000
--- a/Source/GB_hcat_slice.c
+++ /dev/null
@@ -1,173 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_hcat_slice: horizontal concatenation of the slices of C
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Horizontal concatenation of slices into the matrix C.
-
-#define GB_FREE_WORK                                            \
-{                                                               \
-    GB_FREE_MEMORY (Cnzs,   nthreads+1, sizeof (int64_t)) ;     \
-    GB_FREE_MEMORY (Cnvecs, nthreads+1, sizeof (int64_t)) ;     \
-}
-
-#include "GB_mxm.h"
-
-GrB_Info GB_hcat_slice      // horizontal concatenation of the slices of C
-(
-    GrB_Matrix *Chandle,    // output matrix C to create
-    int nthreads,           // # of slices to concatenate
-    GrB_Matrix *Cslice,     // array of slices of size nthreads
-    GB_Context Context
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (nthreads > 1) ;
-    ASSERT (Chandle != NULL) ;
-    ASSERT (*Chandle == NULL) ;
-    ASSERT (Cslice != NULL) ;
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    {
-        ASSERT_MATRIX_OK (Cslice [tid], "a slice of C", GB0) ;
-        ASSERT (!GB_PENDING (Cslice [tid])) ;
-        ASSERT (!GB_ZOMBIES (Cslice [tid])) ;
-        ASSERT ((Cslice [tid])->is_hyper) ;
-        // each Cslice [tid] is constructed as its own matrix, with Cslice
-        // [tid] = A * Bslice [tid].  It is not a slice of an other matrix, so
-        // Cslice [tid]->is_slice is false.
-        ASSERT (!(Cslice [tid])->is_slice) ;
-        ASSERT ((Cslice [tid])->type == (Cslice [0])->type) ;
-        ASSERT ((Cslice [tid])->vlen == (Cslice [0])->vlen) ;
-        ASSERT ((Cslice [tid])->vdim == (Cslice [0])->vdim) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // allocate workspace
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Cnzs   ;  // size nthreads+1
-    int64_t *GB_RESTRICT Cnvecs ;  // size nthreads+1
-    GB_MALLOC_MEMORY (Cnzs,   nthreads+1, sizeof (int64_t)) ;
-    GB_MALLOC_MEMORY (Cnvecs, nthreads+1, sizeof (int64_t)) ;
-    if (Cnzs == NULL || Cnvecs == NULL)
-    {
-        // out of memory
-        GB_FREE_WORK ;
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // find the size and type of C
-    //--------------------------------------------------------------------------
-
-    // Let cnz_slice [tid] be the number of entries in Cslice [tid], and let
-    // cnvec_slice [tid] be the number vectors in Cslice [tid].  Then Cnzs and
-    // Cnvecs are cumulative sums of cnz_slice and cnvec_slice, respectively:
-
-    // Cnzs   [tid] = sum of cnz_slice   [0:tid-1]
-    // Cnvecs [tid] = sum of cnvec_slice [0:tid-1]
-
-    // both arrays are size nthreads+1.  Thus, both Cnzs [0] and Cnvecs [0] are
-    // zero, and their last entries are the total # entries and vectors in C,
-    // respectively.
-
-    // all the slices have the same type and dimension
-    GrB_Type ctype = (Cslice [0])->type ;
-    int64_t  cvlen = (Cslice [0])->vlen ;
-    int64_t  cvdim = (Cslice [0])->vdim ;
-
-    int64_t cnz = 0 ;
-    int64_t cnvec = 0 ;
-    int64_t cnvec_nonempty = 0 ;
-
-    for (int tid = 0 ; tid < nthreads ; tid++)
-    { 
-        // compute the cumulative sum of the # entries and # vectors
-        Cnzs   [tid] = cnz ;
-        Cnvecs [tid] = cnvec ;
-        cnz   += GB_NNZ (Cslice [tid]) ;
-        cnvec += (Cslice [tid])->nvec ;
-        // also sum the total number of non-empty vectors in all the slices
-        cnvec_nonempty += (Cslice [tid])->nvec_nonempty ;
-    }
-
-    Cnzs   [nthreads] = cnz ;       // total # entries in C
-    Cnvecs [nthreads] = cnvec ;     // total # vectors in C
-
-    //--------------------------------------------------------------------------
-    // create C and allocate all of its space
-    //--------------------------------------------------------------------------
-
-    GrB_Info info ;
-    GB_CREATE (Chandle, ctype, cvlen, cvdim, GB_Ap_malloc, true,
-        GB_FORCE_HYPER, GB_Global_hyper_ratio_get ( ), cnvec, cnz, true,
-        Context) ;
-    if (info != GrB_SUCCESS)
-    { 
-        // out of memory
-        GB_FREE_WORK ;
-        return (GB_OUT_OF_MEMORY) ;
-    }
-
-    GrB_Matrix C = (*Chandle) ;
-
-    int64_t *GB_RESTRICT Ch = C->h ;
-    int64_t *GB_RESTRICT Cp = C->p ;
-    int64_t *GB_RESTRICT Ci = C->i ;
-    GB_void *GB_RESTRICT Cx = C->x ;
-    size_t csize = ctype->size ;
-
-    C->nvec_nonempty = cnvec_nonempty ;
-    C->nvec = cnvec ;
-    Cp [cnvec] = cnz ;
-
-    //--------------------------------------------------------------------------
-    // copy each slice into C
-    //--------------------------------------------------------------------------
-
-    int tid ;
-    #pragma omp parallel for num_threads(nthreads) schedule(static,1)
-    for (tid = 0 ; tid < nthreads ; tid++)
-    {
-        // get the Cslice [tid] and its position in C
-        int64_t *GB_RESTRICT Csliceh = (Cslice [tid])->h ;
-        int64_t *GB_RESTRICT Cslicep = (Cslice [tid])->p ;
-        int64_t *GB_RESTRICT Cslicei = (Cslice [tid])->i ;
-        GB_void *GB_RESTRICT Cslicex = (Cslice [tid])->x ;
-        int64_t cnz         = Cnzs   [tid] ;
-        int64_t cnz_slice   = Cnzs   [tid+1] - cnz ;
-        int64_t cnvec       = Cnvecs [tid] ;
-        int64_t cnvec_slice = Cnvecs [tid+1] - cnvec ;
-
-        // copy the row indices and values of Cslice [tid] into Ci and Cx
-        memcpy (Ci + cnz        , Cslicei, cnz_slice * sizeof (int64_t)) ;
-        memcpy (Cx + cnz * csize, Cslicex, cnz_slice * csize) ;
-
-        // copy the column indices of Cslice into Ch
-        memcpy (Ch + cnvec, Csliceh, cnvec_slice * sizeof (int64_t)) ;
-
-        // construct the column pointers of C (shift upwards by cnz)
-        for (int64_t k = 0 ; k < cnvec_slice ; k++)
-        { 
-            Cp [cnvec + k] = Cslicep [k] + cnz ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // free workspace and finalize the matrix
-    //--------------------------------------------------------------------------
-
-    GB_FREE_WORK ;
-    C->magic = GB_MAGIC ;
-    ASSERT_MATRIX_OK (C, "C from horizontal concatenation", GB0) ;
-    return (GrB_SUCCESS) ;
-}
-
diff --git a/Source/GB_heap.h b/Source/GB_heap.h
deleted file mode 100644
index 2225bd8f54..0000000000
--- a/Source/GB_heap.h
+++ /dev/null
@@ -1,284 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_heap: a Heap data structure and its operations
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// The Heap is an array of GB_Elements: Heap [1..nheap].  Each entry in the
-// Heap is a GB_Element, with a key and name.
-
-// These functions are only used by the heap method for C=A*B.
-// See Source/Template/GB_AxB_heap_mask.c.
-
-#ifndef GB_HEAP
-#define GB_HEAP
-
-#ifdef GB_DEBUG
-
-//------------------------------------------------------------------------------
-// GB_heap_check: make sure the min-heap property holds for the whole Heap
-//------------------------------------------------------------------------------
-
-// Check the entire Heap to see if it has the min-heap property:  for all nodes
-// in the Heap, the key of a node must less than or equal to the keys of its
-// children  (duplicate keys may appear).  An empty Heap or a Heap of size 1
-// always satisfies the min-heap property, but nheap < 0 is invalid.  This
-// function is for assertions only.
-
-static inline bool GB_heap_check
-(
-    const GB_Element *GB_RESTRICT Heap,    // Heap [1..nheap], not modified
-    const int64_t nheap                 // the number of nodes in the Heap
-)
-{
-
-    if (Heap == NULL || nheap < 0)
-    {
-        // Heap is invalid
-        return (false) ;
-    }
-
-    // nodes nheap/2 ... nheap have no children, so no need to check them
-    for (int64_t p = 1 ; p <= nheap / 2 ; p++)
-    {
-
-        // consider node p.  Its key must be <= the key of both its children.
-
-        int64_t pleft  = 2*p ;          // left child of node p
-        int64_t pright = pleft + 1 ;    // right child of node p
-
-        if (pleft <= nheap && Heap [p].key > Heap [pleft].key)
-        {
-            // left child of p is in the Heap, but p has a bigger key;
-            // the min-heap property is not satisfied
-            return (false) ;
-        }
-
-        if (pright <= nheap && Heap [p].key > Heap [pright].key)
-        {
-            // left child of p is in the Heap, but p has a bigger key;
-            // the min-heap property is not satisfied
-            return (false) ;
-        }
-    }
-
-    // Heap is OK and satisfies the min-heap property
-    return (true) ;
-}
-
-//------------------------------------------------------------------------------
-// GB_heap_path_check: make sure a path in the Heap is valid
-//------------------------------------------------------------------------------
-
-// The path from node p up to the root node 1 (node p, parent(p),
-// parent(parent(p)) ... to node 1) must all have the same key where
-// parent(p)=p/2.
-
-static inline bool GB_heap_pathcheck
-(
-    int64_t p,                          // node to check, in range 1..nheap
-    const GB_Element *GB_RESTRICT Heap,    // Heap [1..nheap], not modified
-    const int64_t nheap                 // the number of nodes in the Heap
-)
-{
-
-    if (Heap == NULL || nheap < 0 || p < 1 || p > nheap)
-    {
-        // Heap is invalid or node p is not in the range 1..nheap
-        return (false) ;
-    }
-
-    for (int64_t kheap = p ; kheap >= 1 ; kheap = kheap / 2)
-    {
-        if (Heap [p].key != Heap [kheap].key)
-        {
-            // key of node p does not match one if its ancestors
-            return (false) ;
-        }
-    }
-
-    // all nodes from p to the root have the same key
-    return (true) ;
-}
-
-#endif
-
-//------------------------------------------------------------------------------
-// GB_heapify: enforce the min-heap property of a node
-//------------------------------------------------------------------------------
-
-// Heapify starting at node p in the Heap.  On input, the Heap rooted at node p
-// satisfies the min-heap property, except for Heap [p] itself.  On output, all
-// of the Heap rooted at node p satisfies the min-heap property.
-
-static inline void GB_heapify
-(
-    int64_t p,                      // node that needs to be heapified
-    GB_Element *GB_RESTRICT Heap,      // Heap [1..nheap]; modified
-    const int64_t nheap             // the number of nodes in the Heap
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs and check for quick return
-    //--------------------------------------------------------------------------
-
-    ASSERT (Heap != NULL) ;
-
-    if (p > nheap / 2 || nheap <= 1)
-    { 
-        // nothing to do.  p has no children in the Heap.
-        // Also safely do nothing if p is outside the Heap (p > nheap).
-        return ;
-    }
-
-    //--------------------------------------------------------------------------
-    // get the element to heapify
-    //--------------------------------------------------------------------------
-
-    // Get the element e at node p in the Heap; the one that needs heapifying.
-    GB_Element e = Heap [p] ;
-
-    // There is now a "hole" at Heap [p], with no element in it.
-
-    //--------------------------------------------------------------------------
-    // heapify
-    //--------------------------------------------------------------------------
-
-    while (true)
-    {
-
-        //----------------------------------------------------------------------
-        // consider node p in the Heap
-        //----------------------------------------------------------------------
-
-        // Heap [p] is the "hole" in the Heap
-
-        int64_t pleft  = 2*p ;          // left child of node p
-        int64_t pright = pleft + 1 ;    // right child of node p
-
-        if (pright <= nheap)
-        {
-
-            //------------------------------------------------------------------
-            // both left and right children are in the Heap
-            //------------------------------------------------------------------
-
-            if (Heap [pleft].key < Heap [pright].key)
-            {
-                // left node has a smaller key than the right node
-                if (e.key > Heap [pleft].key)
-                { 
-                    // key of element e is bigger than the left child of p, so
-                    // bubble up the left child into the hole at Heap [p] and
-                    // continue down the left child.  The hole moves to node
-                    // pleft.
-                    Heap [p] = Heap [pleft] ;
-                    p = pleft ;
-                }
-                else
-                { 
-                    // done!  key of element e is is smaller than the left
-                    // child of p; place e in the hole at p, and we're done.
-                    Heap [p] = e ;
-                    return ;
-                }
-            }
-            else
-            {
-                // right node has a smaller key than the left node
-                if (e.key > Heap [pright].key)
-                { 
-                    // key of element e is bigger than the right child of p, so
-                    // bubble up the right child into hole at Heap [p] and
-                    // continue down the right child.  The hole moves to node
-                    // pright.
-                    Heap [p] = Heap [pright] ;
-                    p = pright ;
-                }
-                else
-                { 
-                    // done!  key of element e is is smaller than the right
-                    // child of p; place e in the hole at p, and we're done.
-                    Heap [p] = e ;
-                    return ;
-                }
-            }
-        }
-        else
-        {
-
-            //------------------------------------------------------------------
-            // right child is not in the Heap, see if left child is in the Heap
-            //------------------------------------------------------------------
-
-            if (pleft <= nheap)
-            {
-                // left child is in the Heap; check its key
-                if (e.key > Heap [pleft].key)
-                { 
-                    // key of element e is bigger than the left child of p, so
-                    // bubble up the left child into the hole at Heap [p] and
-                    // continue down the left child.  The hole moves to node
-                    // pleft.
-                    Heap [p] = Heap [pleft] ;
-                    p = pleft ;
-                }
-            }
-
-            //------------------------------------------------------------------
-            // node p is a hole, and it has no children
-            //------------------------------------------------------------------
-
-            // put e in the hole, and we're done
-            Heap [p] = e ;
-            return ;
-        }
-    }
-}
-
-//------------------------------------------------------------------------------
-// GB_heap_build: construct a Heap
-//------------------------------------------------------------------------------
-
-// On input, the Heap [1..nheap] may not satisfy the min-heap property.
-// On output, the elements have been rearranged so that it does.
-
-void GB_heap_build
-(
-    GB_Element *GB_RESTRICT Heap,  // Heap [1..nheap]; modified
-    const int64_t nheap         // the number of nodes in the Heap
-) ;
-
-//------------------------------------------------------------------------------
-// GB_heap_delete: delete an element in the middle of a Heap
-//------------------------------------------------------------------------------
-
-void GB_heap_delete
-(
-    int64_t p,                  // node that needs to be deleted
-    GB_Element *GB_RESTRICT Heap,  // Heap [1..nheap]
-    int64_t *GB_RESTRICT nheap     // the number of nodes in the Heap;
-                                // decremented on output
-) ;
-
-//------------------------------------------------------------------------------
-// GB_heap_getminlist: get a list of all nodes with minimum key
-//------------------------------------------------------------------------------
-
-int64_t GB_heap_getminlist      // returns Heap [1].key
-(
-    const GB_Element *GB_RESTRICT Heap,    // Heap [1..nheap], not modified
-    const int64_t nheap,                // the number of nodes in the Heap
-    // output
-    int64_t *GB_RESTRICT List,     // List [0..nlist-1] is a list of all nodes p
-                                // with Heap [p].key == Heap [1].key.  Node 1
-                                // is always in the list.  List has size nheap.
-    int64_t *GB_RESTRICT nlist     // the size of the List
-) ;
-
-#endif
-
diff --git a/Source/GB_heap_build.c b/Source/GB_heap_build.c
deleted file mode 100644
index 1aa5829c29..0000000000
--- a/Source/GB_heap_build.c
+++ /dev/null
@@ -1,45 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_heap_build: construct a Heap
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// On input, the Heap [1..nheap] may not satisfy the min-heap property.
-// On output, the elements have been rearranged so that it does.
-
-#include "GB.h"
-#include "GB_heap.h"
-
-void GB_heap_build
-(
-    GB_Element *GB_RESTRICT Heap,  // Heap [1..nheap]; modified
-    const int64_t nheap         // the number of nodes in the Heap
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Heap != NULL && nheap >= 0) ;
-
-    //--------------------------------------------------------------------------
-    // build the Heap
-    //--------------------------------------------------------------------------
-
-    for (int64_t p = nheap / 2 ; p >= 1 ; p--)
-    { 
-        GB_heapify (p, Heap, nheap) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // check result
-    //--------------------------------------------------------------------------
-
-    // Heap [1..nheap] now satisfies the min-heap property
-    ASSERT (GB_heap_check (Heap, nheap)) ;
-}
-
diff --git a/Source/GB_heap_delete.c b/Source/GB_heap_delete.c
deleted file mode 100644
index 8c65ca1e8f..0000000000
--- a/Source/GB_heap_delete.c
+++ /dev/null
@@ -1,39 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_heap_delete: delete an element in the middle of a Heap
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-#include "GB.h"
-#include "GB_heap.h"
-
-void GB_heap_delete
-(
-    int64_t p,                  // node that needs to be deleted
-    GB_Element *GB_RESTRICT Heap,  // Heap [1..nheap]
-    int64_t *GB_RESTRICT nheap     // the number of nodes in the Heap;
-                                // decremented on output
-)
-{ 
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (Heap != NULL && (*nheap) >= 0) ;
-    ASSERT (p >= 0 && p <= (*nheap)) ;
-
-    //--------------------------------------------------------------------------
-    // delete node p from the Heap
-    //--------------------------------------------------------------------------
-
-    // move the last node to node p and decrement the # of nodes in the Heap
-    Heap [p] = Heap [(*nheap)--] ;
-
-    // heapify node p (safely does nothing if node p was the one just deleted)
-    GB_heapify (p, Heap, (*nheap)) ;
-}
-
diff --git a/Source/GB_heap_getminlist.c b/Source/GB_heap_getminlist.c
deleted file mode 100644
index 4f65430e18..0000000000
--- a/Source/GB_heap_getminlist.c
+++ /dev/null
@@ -1,93 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_heap_getminlist: get a list of all nodes with minimum key
-//------------------------------------------------------------------------------ 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Constructs a list of all nodes in the Heap with a key equal to Heap [1].key.
-// The Heap is not modified.  The list is returned in topological order: If
-// node p appears as p = List [k], and if its left child pleft = 2*p is in the
-// list at pleft = List [kleft], then k < kleft.  Likewise for its right child,
-// pright = 2*p+1.
-
-#include "GB.h"
-#include "GB_heap.h"
-
-int64_t GB_heap_getminlist      // returns Heap [1].key
-(
-    const GB_Element *GB_RESTRICT Heap,    // Heap [1..nheap], not modified
-    const int64_t nheap,                // the number of nodes in the Heap
-    // output
-    int64_t *GB_RESTRICT List,     // List [0..nlist-1] is a list of all nodes p
-                                // with Heap [p].key == Heap [1].key.  Node 1
-                                // is always in the list.  List has size nheap.
-    int64_t *GB_RESTRICT nlist     // the size of the List
-)
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (GB_heap_check (Heap, nheap)) ;
-    ASSERT (nheap >= 1) ;
-
-    //--------------------------------------------------------------------------
-    // start the list with node 1
-    //--------------------------------------------------------------------------
-
-    // nothing in the List
-    (*nlist) = 0 ;
-
-    // push node 1 on the stack (in workspace at the bottom of the List)
-    int64_t top = nheap ;
-    List [--top] = 1 ;
-
-    // get the key of node 1
-    int64_t minkey = Heap [1].key ;
-
-    //--------------------------------------------------------------------------
-    // fill the List while the stack is not empty
-    //--------------------------------------------------------------------------
-
-    while (top < nheap)
-    {
-        // pop the top of the stack
-        int64_t p = List [top++] ;
-
-        // append p to the List
-        List [(*nlist)++] = p ;
-
-        // push its right child on the stack, if it has the same key
-        int64_t pright = 2*p + 1 ;
-        if (pright <= nheap && Heap [pright].key == minkey)
-        { 
-            List [--top] = pright ;
-        }
-
-        // push its left child on the stack, if it has the same key
-        int64_t pleft = 2*p ;
-        if (pleft <= nheap && Heap [pleft].key == minkey)
-        { 
-            List [--top] = pleft ;
-        }
-    }
-
-    //--------------------------------------------------------------------------
-    // return the result
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_DEBUG
-    for (int64_t klist = 0 ; klist < (*nlist) ; klist++)
-    {
-        // each node p in the List satisfies the path property
-        int64_t p = List [klist] ;
-        ASSERT (GB_heap_pathcheck (p, Heap, nheap)) ;
-    }
-    #endif
-
-    return (minkey) ;
-}
-
diff --git a/Source/GB_hyper_prune.c b/Source/GB_hyper_prune.c
index 0952b85051..b64ac8f2d4 100644
--- a/Source/GB_hyper_prune.c
+++ b/Source/GB_hyper_prune.c
@@ -2,7 +2,7 @@
 // GB_hyper_prune: remove empty vectors from a hypersparse Ap, Ah list
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -56,7 +56,7 @@ GrB_Info GB_hyper_prune
     int64_t *GB_RESTRICT W ;
     GB_MALLOC_MEMORY (W, nvec_old+1, sizeof (int64_t)) ;
     if (W == NULL)
-    {
+    { 
         // out of memory
         return (GB_OUT_OF_MEMORY) ;
     }
diff --git a/Source/GB_hyper_realloc.c b/Source/GB_hyper_realloc.c
index 2afaa00de9..93f565e732 100644
--- a/Source/GB_hyper_realloc.c
+++ b/Source/GB_hyper_realloc.c
@@ -2,7 +2,7 @@
 // GB_hyper_realloc: reallocate a matrix hyperlist
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_hypermatrix_prune.c b/Source/GB_hypermatrix_prune.c
new file mode 100644
index 0000000000..c2ebd7448c
--- /dev/null
+++ b/Source/GB_hypermatrix_prune.c
@@ -0,0 +1,75 @@
+//------------------------------------------------------------------------------
+// GB_hypermatrix_prune: prune empty vectors from a hypersparse matrix
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB.h"
+
+GrB_Info GB_hypermatrix_prune
+(
+    GrB_Matrix A,               // matrix to prune
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    ASSERT (A != NULL) ;
+    ASSERT (GB_ZOMBIES_OK (A)) ;
+    if (!A->is_hyper)
+    { 
+        // nothing to do
+        return (GrB_SUCCESS) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // count # of empty vectors
+    //--------------------------------------------------------------------------
+
+    if (A->nvec_nonempty < 0)
+    { 
+        A->nvec_nonempty = GB_nvec_nonempty (A, Context) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // prune empty vectors
+    //--------------------------------------------------------------------------
+
+    if (A->nvec_nonempty < A->nvec)
+    { 
+        // create new Ap_new and Ah_new arrays, with no empty vectors
+        int64_t *GB_RESTRICT Ap_new = NULL ;
+        int64_t *GB_RESTRICT Ah_new = NULL ;
+        int64_t nvec_new ;
+        GrB_Info info = GB_hyper_prune (&Ap_new, &Ah_new, &nvec_new,
+            A->p, A->h, A->nvec, Context) ;
+        if (info != GrB_SUCCESS)
+        { 
+            // out of memory
+            return (info) ;
+        }
+        // free the old A->p and A->h (they might be shallow)
+        GB_ph_free (A) ;
+        // transplant the new hyperlist into A
+        A->p = Ap_new ;
+        A->h = Ah_new ;
+        A->nvec = nvec_new ;
+        A->plen = nvec_new ;
+        A->nvec_nonempty = nvec_new ;
+        A->magic = GB_MAGIC ;
+    }
+
+    //--------------------------------------------------------------------------
+    // return result
+    //--------------------------------------------------------------------------
+
+    ASSERT (A->nvec_nonempty == GB_nvec_nonempty (A, Context)) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_ij.h b/Source/GB_ij.h
index 37acdcd2b0..9b14e190cb 100644
--- a/Source/GB_ij.h
+++ b/Source/GB_ij.h
@@ -2,7 +2,7 @@
 // GB_ij.h: definitions for I and J index lists
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ijlength.c b/Source/GB_ijlength.c
index c00df8b244..d112892d51 100644
--- a/Source/GB_ijlength.c
+++ b/Source/GB_ijlength.c
@@ -2,7 +2,7 @@
 // GB_ijlength: get the length and kind of an index list I
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ijproperties.c b/Source/GB_ijproperties.c
index b94d5d438f..cfc7e078fb 100644
--- a/Source/GB_ijproperties.c
+++ b/Source/GB_ijproperties.c
@@ -2,7 +2,7 @@
 // GB_ijproperties: check I and determine its properties
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ijsort.c b/Source/GB_ijsort.c
index 58149dbd24..4deef2e514 100644
--- a/Source/GB_ijsort.c
+++ b/Source/GB_ijsort.c
@@ -2,7 +2,7 @@
 // GB_ijsort:  sort an index array I and remove duplicates
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -149,7 +149,7 @@ GrB_Info GB_ijsort
 
     GB_MALLOC_MEMORY (Count, ntasks+1, sizeof (int64_t)) ;
     if (Count == NULL)
-    {
+    { 
         // out of memory
         GB_FREE_WORK ;
         return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_init.c b/Source/GB_init.c
index f29b1cf5d5..90a6422626 100644
--- a/Source/GB_init.c
+++ b/Source/GB_init.c
@@ -2,7 +2,7 @@
 // GB_init: initialize GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,7 +11,7 @@
 // both rely on this internal function.  If GraphBLAS is used by multiple user
 // threads, only one can call GrB_init or GxB_init.
 
-// Result are undefined in multiple user threads simultaneously
+// Result are undefined if multiple user threads simultaneously
 // call GrB_init (or GxB_init).
 
 // GrB_finalize must be called as the last GraphBLAS operation.
@@ -115,10 +115,6 @@ GrB_Info GB_init            // start up GraphBLAS
     GB_Global_nthreads_max_set (GB_Global_omp_get_max_threads ( )) ;
     GB_Global_chunk_set (GB_CHUNK_DEFAULT) ;
 
-    #if defined ( _OPENMP )
-    omp_set_nested (true) ;
-    #endif
-
     //--------------------------------------------------------------------------
     // initialize thread-local storage
     //--------------------------------------------------------------------------
@@ -175,16 +171,6 @@ GrB_Info GB_init            // start up GraphBLAS
     // set the mode: blocking or nonblocking
     GB_Global_mode_set (mode) ;
 
-    //--------------------------------------------------------------------------
-    // clear Sauna workspaces
-    //--------------------------------------------------------------------------
-
-    for (int tid = 0 ; tid < GxB_NTHREADS_MAX ; tid++)
-    { 
-        GB_Global_Saunas_set (tid, NULL) ;
-        GB_Global_Sauna_in_use_set (tid, false) ;
-    }
-
     //--------------------------------------------------------------------------
     // set the global default format
     //--------------------------------------------------------------------------
@@ -205,6 +191,12 @@ GrB_Info GB_init            // start up GraphBLAS
     GB_Global_malloc_debug_count_set (0) ;
     GB_Global_inuse_clear ( ) ;
 
+    //--------------------------------------------------------------------------
+    // development use only; controls diagnostic output
+    //--------------------------------------------------------------------------
+
+    GB_Global_burble_set (false) ;
+
     //--------------------------------------------------------------------------
     // return result
     //--------------------------------------------------------------------------
diff --git a/Source/GB_is_diagonal.c b/Source/GB_is_diagonal.c
index 0e90669c3d..ce1297dc56 100644
--- a/Source/GB_is_diagonal.c
+++ b/Source/GB_is_diagonal.c
@@ -2,7 +2,7 @@
 // GB_is_diagonal: check if A is a diagonal matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 // present.  All pending tuples are ignored.  Zombies are treated as entries.
 
 #include "GB_mxm.h"
+#include "GB_atomics.h"
 
 bool GB_is_diagonal             // true if A is diagonal
 (
@@ -86,7 +87,7 @@ bool GB_is_diagonal             // true if A is diagonal
                 #pragma omp critical (GB_is_diagonal)
                 diag = diagonal ;
             #else
-                #pragma omp atomic read
+                GB_ATOMIC_READ
                 diag = diagonal ;
             #endif
         }
@@ -125,7 +126,7 @@ bool GB_is_diagonal             // true if A is diagonal
                 #pragma omp critical (GB_is_diagonal)
                 diagonal = false ;
             #else
-                #pragma omp atomic write
+                GB_ATOMIC_WRITE
                 diagonal = false ;
             #endif
         }
diff --git a/Source/GB_iterator.h b/Source/GB_iterator.h
index b4cea6ccf2..aa1b09bb90 100644
--- a/Source/GB_iterator.h
+++ b/Source/GB_iterator.h
@@ -2,7 +2,7 @@
 // GB_iterator.h: definitions for the GrB_Matrix iterator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ix_alloc.c b/Source/GB_ix_alloc.c
index 3e34ce99c2..622f764f33 100644
--- a/Source/GB_ix_alloc.c
+++ b/Source/GB_ix_alloc.c
@@ -2,7 +2,7 @@
 // GB_ix_alloc: allocate a matrix to hold a given number of entries
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ix_free.c b/Source/GB_ix_free.c
index a81cc94789..44655750f3 100644
--- a/Source/GB_ix_free.c
+++ b/Source/GB_ix_free.c
@@ -2,7 +2,7 @@
 // GB_ix_free: free A->i, A->x, pending tuples, zombies; A->p, A->h unchanged
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ix_realloc.c b/Source/GB_ix_realloc.c
index ad248d836c..d74627740d 100644
--- a/Source/GB_ix_realloc.c
+++ b/Source/GB_ix_realloc.c
@@ -2,7 +2,7 @@
 // GB_ix_realloc: reallocate a matrix to hold a given number of entries
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_ix_resize.c b/Source/GB_ix_resize.c
index 4c66388219..962608073c 100644
--- a/Source/GB_ix_resize.c
+++ b/Source/GB_ix_resize.c
@@ -2,7 +2,7 @@
 // GB_ix_resize:  reallocate a matrix with some slack for future growth
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_jappend.h b/Source/GB_jappend.h
index 2cd61eb7ec..37100de7ba 100644
--- a/Source/GB_jappend.h
+++ b/Source/GB_jappend.h
@@ -2,7 +2,7 @@
 // GB_jappend.h: definitions of GB_jstartup, GB_jappend, and GB_jwrapup
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,7 +21,8 @@
 // GB_jstartup logs the start of C(:,0); it also acts as if it logs the end of
 // the sentinal vector C(:,-1).
 
-static inline void GB_jstartup
+#if 0
+static inline void GB_jstartup          // no longer used in v3.2.0
 (
     GrB_Matrix C,           // matrix to start creating
     int64_t *jlast,         // last vector appended, set to -1
@@ -34,11 +35,12 @@ static inline void GB_jstartup
     (*cnz_last) = 0 ;
     (*jlast) = -1 ;         // last sentinal vector is -1
     if (C->is_hyper)
-    { 
+    {
         C->nvec = 0 ;       // clear all existing vectors from C
     }
     C->nvec_nonempty = 0 ;  // # of non-empty vectors will be counted
 }
+#endif
 
 //------------------------------------------------------------------------------
 // GB_jappend:  append a new vector to the end of a matrix
@@ -76,7 +78,7 @@ static inline GrB_Info GB_jappend
     ASSERT (C->p != NULL) ;
 
     if (cnz <= (*cnz_last))
-    { 
+    {
         // nothing to do
         return (GrB_SUCCESS) ;
     }
diff --git a/Source/GB_kron.c b/Source/GB_kron.c
index a0f6e42bcb..024d01c0ad 100644
--- a/Source/GB_kron.c
+++ b/Source/GB_kron.c
@@ -2,7 +2,7 @@
 // GB_kron: C<M> = accum (C, kron(A,B))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GB_kron                    // C<M> = accum (C, kron(A,B))
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_BinaryOp op,          // defines '*' for kron(A,B)
     const GrB_Matrix A,             // input matrix
@@ -121,6 +122,7 @@ GrB_Info GB_kron                    // C<M> = accum (C, kron(A,B))
     {
         // AT = A' and typecast to op->xtype
         // transpose: typecast, no op, not in place
+        GBBURBLE ("(A transpose) ") ;
         info = GB_transpose (&AT, op->xtype, is_csc, A, NULL, Context) ;
         if (info != GrB_SUCCESS)
         { 
@@ -135,6 +137,7 @@ GrB_Info GB_kron                    // C<M> = accum (C, kron(A,B))
     {
         // BT = B' and typecast to op->ytype
         // transpose: typecast, no op, not in place
+        GBBURBLE ("(B transpose) ") ;
         info = GB_transpose (&BT, op->ytype, is_csc, B, NULL, Context) ;
         if (info != GrB_SUCCESS)
         { 
@@ -168,6 +171,6 @@ GrB_Info GB_kron                    // C<M> = accum (C, kron(A,B))
     //--------------------------------------------------------------------------
 
     return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+        Mask_struct, Context)) ;
 }
 
diff --git a/Source/GB_kron.h b/Source/GB_kron.h
index 68c832529d..94979bf7bb 100644
--- a/Source/GB_kron.h
+++ b/Source/GB_kron.h
@@ -2,7 +2,7 @@
 // GB_kron.h: definitions for GB_kron
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_kron                    // C<M> = accum (C, kron(A,B))
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_BinaryOp op,          // defines '*' for kron(A,B)
     const GrB_Matrix A,             // input matrix
diff --git a/Source/GB_kroner.c b/Source/GB_kroner.c
index cba7455440..6e687cab36 100644
--- a/Source/GB_kroner.c
+++ b/Source/GB_kroner.c
@@ -2,7 +2,7 @@
 // GB_kroner: Kronecker product, C = kron (A,B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -172,44 +172,56 @@ GrB_Info GB_kroner                  // C = kron (A,B)
         int64_t kA = kC / bnvec ;
         int64_t kB = kC % bnvec ;
 
-            // get B(:,jB), the (kB)th vector of B
-            int64_t pB_start = Bp [kB] ;
-            int64_t pB_end   = Bp [kB+1] ;
-            int64_t bknz = pB_start - pB_end ;
-            if (bknz == 0) continue ;
-            GB_void bwork [GB_VLA(bsize)] ;
-            // get C(:,jC), the (kC)th vector of C
-            // int64_t kC = kA * bnvec + kB ;
-            int64_t pC = Cp [kC] ;
-            // get A(:,jA), the (kA)th vector of A
-            int64_t pA_start = Ap [kA] ;
-            int64_t pA_end   = Ap [kA+1] ;
-            GB_void awork [GB_VLA(asize)] ;
-            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
-            {
-                // awork = A(iA,jA), typecasted to op->xtype
-                int64_t iA = Ai [pA] ;
-                int64_t iAblock = iA * bvlen ;
-                cast_A (awork, Ax +(pA*asize), asize) ;
-                for (int64_t pB = pB_start ; pB < pB_end ; pB++)
-                { 
-                    // bwork = B(iB,jB), typecasted to op->ytype
-                    int64_t iB = Bi [pB] ;
-                    cast_B (bwork, Bx +(pB*bsize), bsize) ;
-                    // C(iC,jC) = A(iA,jA) * B(iB,jB)
-                    int64_t iC = iAblock + iB ;
-                    Ci [pC] = iC ;
-                    fmult (Cx +(pC*csize), awork, bwork) ;
-                    pC++ ;
-                }
+        // get B(:,jB), the (kB)th vector of B
+        int64_t pB_start = Bp [kB] ;
+        int64_t pB_end   = Bp [kB+1] ;
+        int64_t bknz = pB_start - pB_end ;
+        if (bknz == 0) continue ;
+        GB_void bwork [GB_VLA(bsize)] ;
+
+        // get C(:,jC), the (kC)th vector of C
+        // int64_t kC = kA * bnvec + kB ;
+        int64_t pC = Cp [kC] ;
+
+        // get A(:,jA), the (kA)th vector of A
+        int64_t pA_start = Ap [kA] ;
+        int64_t pA_end   = Ap [kA+1] ;
+        GB_void awork [GB_VLA(asize)] ;
+
+        for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+        {
+            // awork = A(iA,jA), typecasted to op->xtype
+            int64_t iA = Ai [pA] ;
+            int64_t iAblock = iA * bvlen ;
+            cast_A (awork, Ax +(pA*asize), asize) ;
+            for (int64_t pB = pB_start ; pB < pB_end ; pB++)
+            { 
+                // bwork = B(iB,jB), typecasted to op->ytype
+                int64_t iB = Bi [pB] ;
+                cast_B (bwork, Bx +(pB*bsize), bsize) ;
+                // C(iC,jC) = A(iA,jA) * B(iB,jB)
+                int64_t iC = iAblock + iB ;
+                Ci [pC] = iC ;
+                fmult (Cx +(pC*csize), awork, bwork) ;
+                pC++ ;
             }
-
+        }
     }
 
     //--------------------------------------------------------------------------
     // remove empty vectors from C, if hypersparse
     //--------------------------------------------------------------------------
 
+    info = GB_hypermatrix_prune (C, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_MATRIX_FREE (&C) ;
+        return (info) ;
+    }
+
+#if 0
+    // see GB_hypermatrix_prune
     if (C_is_hyper && C->nvec_nonempty < cnvec)
     {
         // create new Cp_new and Ch_new arrays, with no empty vectors
@@ -233,6 +245,7 @@ GrB_Info GB_kroner                  // C = kron (A,B)
         C->plen = nvec_new ;
         ASSERT (C->nvec == C->nvec_nonempty) ;
     }
+#endif
 
     ASSERT (C->nvec_nonempty == GB_nvec_nonempty (C, Context)) ;
 
diff --git a/Source/GB_malloc_memory.c b/Source/GB_malloc_memory.c
index 34634251a6..96b9c8034f 100644
--- a/Source/GB_malloc_memory.c
+++ b/Source/GB_malloc_memory.c
@@ -2,7 +2,7 @@
 // GB_malloc_memory: wrapper for malloc_function
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_mask.c b/Source/GB_mask.c
index 3b4bed1cc2..14fadf71ea 100644
--- a/Source/GB_mask.c
+++ b/Source/GB_mask.c
@@ -2,7 +2,7 @@
 // GB_mask: apply a mask: C<M> = Z
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -127,6 +127,7 @@ GrB_Info GB_mask                // C<M> = Z
                                 // Z is freed when done.
     const bool C_replace,       // true if clear(C) to be done first
     const bool Mask_comp,       // true if M is to be complemented
+    const bool Mask_struct,     // if true, use the only structure of M
     GB_Context Context
 )
 {
@@ -297,7 +298,8 @@ GrB_Info GB_mask                // C<M> = Z
         // R = masker (M, C, Z):  compute C<M>=Z, placing results in R
         //----------------------------------------------------------------------
 
-        GB_OK (GB_masker (&R, R_is_csc, M, Mask_comp, C, Z, Context)) ;
+        GB_OK (GB_masker (&R, R_is_csc, M, Mask_comp, Mask_struct, C, Z,
+            Context)) ;
 
         //----------------------------------------------------------------------
         // free temporary matrices Z and C_cleared
diff --git a/Source/GB_mask.h b/Source/GB_mask.h
index 52519944ac..2018d421e6 100644
--- a/Source/GB_mask.h
+++ b/Source/GB_mask.h
@@ -2,7 +2,7 @@
 // GB_mask: definitions for GB_mask and related functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -19,6 +19,7 @@ GrB_Info GB_mask                // C<M> = Z
                                 // Z is freed when done.
     const bool C_replace,       // true if clear(C) to be done first
     const bool Mask_comp,       // true if M is to be complemented
+    const bool Mask_struct,     // if true, use the only structure of M
     GB_Context Context
 ) ;
 
@@ -28,6 +29,7 @@ GrB_Info GB_masker          // R = masker (M, C, Z)
     const bool R_is_csc,    // format of output matrix R
     const GrB_Matrix M,     // required input mask
     const bool Mask_comp,   // descriptor for M
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix C,     // input C matrix
     const GrB_Matrix Z,     // input Z matrix
     GB_Context Context
@@ -50,6 +52,7 @@ GrB_Info GB_mask_phase1                 // count nnz in each R(:,j)
     // original input:
     const GrB_Matrix M,                 // required mask
     const bool Mask_comp,               // if true, then M is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix C,
     const GrB_Matrix Z,
     GB_Context Context
@@ -75,6 +78,7 @@ GrB_Info GB_mask_phase2     // phase2 for R = masker (M,C,Z)
     // original input:
     const GrB_Matrix M,         // required mask
     const bool Mask_comp,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix C,
     const GrB_Matrix Z,
     GB_Context Context
diff --git a/Source/GB_mask_phase1.c b/Source/GB_mask_phase1.c
index 2674932167..c840d6a8da 100644
--- a/Source/GB_mask_phase1.c
+++ b/Source/GB_mask_phase1.c
@@ -2,7 +2,7 @@
 // GB_mask_phase1: find # of entries in R = masker (M,C,Z)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -38,6 +38,7 @@ GrB_Info GB_mask_phase1                 // count nnz in each R(:,j)
     // original input:
     const GrB_Matrix M,                 // required mask
     const bool Mask_comp,               // if true, then M is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix C,
     const GrB_Matrix Z,
     GB_Context Context
diff --git a/Source/GB_mask_phase2.c b/Source/GB_mask_phase2.c
index a50aa7b1b7..7b38fb12ec 100644
--- a/Source/GB_mask_phase2.c
+++ b/Source/GB_mask_phase2.c
@@ -2,7 +2,7 @@
 // GB_mask_phase2: phase2 for R = masker (M,C,Z)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -44,6 +44,7 @@ GrB_Info GB_mask_phase2     // phase2 for R = masker (M,C,Z)
     // original input:
     const GrB_Matrix M,         // required mask
     const bool Mask_comp,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix C,
     const GrB_Matrix Z,
     GB_Context Context
@@ -114,6 +115,16 @@ GrB_Info GB_mask_phase2     // phase2 for R = masker (M,C,Z)
     // prune empty vectors from Rh
     //--------------------------------------------------------------------------
 
+    info = GB_hypermatrix_prune (R, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_MATRIX_FREE (&R) ;
+        return (info) ;
+    }
+
+#if 0
+    // see GB_hypermatrix_prune
     if (R_is_hyper && R->nvec_nonempty < Rnvec)
     {
         // create new Rp_new and Rh_new arrays, with no empty vectors
@@ -137,6 +148,7 @@ GrB_Info GB_mask_phase2     // phase2 for R = masker (M,C,Z)
         R->plen = nvec_new ;
         ASSERT (R->nvec == R->nvec_nonempty) ;
     }
+#endif
 
     //--------------------------------------------------------------------------
     // return result
diff --git a/Source/GB_masker.c b/Source/GB_masker.c
index fa7dc66034..a8b9ceb5df 100644
--- a/Source/GB_masker.c
+++ b/Source/GB_masker.c
@@ -2,7 +2,7 @@
 // GB_masker: R = masker (M, C, Z)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -55,6 +55,7 @@ GrB_Info GB_masker          // R = masker (M, C, Z)
     const bool R_is_csc,    // format of output matrix R
     const GrB_Matrix M,     // required input mask
     const bool Mask_comp,   // descriptor for M
+    const bool Mask_struct, // if true, use the only structure of M
     const GrB_Matrix C,     // input C matrix
     const GrB_Matrix Z,     // input Z matrix
     GB_Context Context
@@ -65,6 +66,8 @@ GrB_Info GB_masker          // R = masker (M, C, Z)
     // check inputs
     //--------------------------------------------------------------------------
 
+    GBBURBLE ("mask ") ;
+
     ASSERT (Rhandle != NULL) ;
     ASSERT_MATRIX_OK (M, "M for masker", GB0) ;
     ASSERT_MATRIX_OK (C, "C for masker", GB0) ;
@@ -139,7 +142,7 @@ GrB_Info GB_masker          // R = masker (M, C, Z)
         // from phase0:
         Rnvec, Rh, R_to_M, R_to_C, R_to_Z,
         // original input:
-        M, Mask_comp, C, Z, Context) ;
+        M, Mask_comp, Mask_struct, C, Z, Context) ;
 
     if (info != GrB_SUCCESS)
     { 
@@ -169,7 +172,7 @@ GrB_Info GB_masker          // R = masker (M, C, Z)
         // from phase0:
         Rnvec, Rh, R_to_M, R_to_C, R_to_Z,
         // original input:
-        M, Mask_comp, C, Z, Context) ;
+        M, Mask_comp, Mask_struct, C, Z, Context) ;
 
     // free workspace
     GB_FREE_MEMORY (TaskList, max_ntasks+1, sizeof (GB_task_struct)) ;
diff --git a/Source/GB_matlab_helper.c b/Source/GB_matlab_helper.c
index baad0c3d8e..6f660f20f8 100644
--- a/Source/GB_matlab_helper.c
+++ b/Source/GB_matlab_helper.c
@@ -2,7 +2,7 @@
 // GB_matlab_helper.c: helper functions for MATLAB interface
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -165,18 +165,6 @@ bool GB_matlab_helper3              // return true if OK, false on error
 
     GB_FREE_WORK (int64_t) ;
 
-//  int64_t k ;
-//  #pragma omp parallel for num_threads(nthreads) schedule(static) \
-//      reduction(&&:ok) reduction(max:listmax)
-//  for (k = 0 ; k < len ; k++)
-//  {
-//      double x = List_double [k] ;
-//      int64_t i = (int64_t) x ;
-//      ok = ok && (x == (double) i) ;
-//      listmax = GB_IMAX (listmax, i) ;
-//      List [k] = i - 1 ;
-//  }
-
     (*List_max) = listmax ;
     return (ok) ;
 }
@@ -223,16 +211,6 @@ bool GB_matlab_helper3i             // return true if OK, false on error
 
     GB_FREE_WORK (int64_t) ;
 
-//  int64_t k ;
-//  #pragma omp parallel for num_threads(nthreads) schedule(static) \
-//      reduction(max:listmax)
-//  for (k = 0 ; k < len ; k++)
-//  {
-//      int64_t i = List_int64 [k] ;
-//      listmax = GB_IMAX (listmax, i) ;
-//      List [k] = i - 1 ;
-//  }
-
     (*List_max) = listmax ;
     return (true) ;
 }
@@ -277,14 +255,6 @@ bool GB_matlab_helper4              // return true if OK, false on error
 
     GB_FREE_WORK (GrB_Index) ;
 
-//  int64_t k ;
-//  #pragma omp parallel for num_threads(nthreads) schedule(static) \
-//      reduction(max:listmax)
-//  for (k = 0 ; k < len ; k++)
-//  {
-//      listmax = GB_IMAX (listmax, I [k]) ;
-//  }
-
     if (len > 0) listmax++ ;
     (*List_max) = listmax ;
     return (true) ;
@@ -382,3 +352,357 @@ void GB_matlab_helper8
     }
 }
 
+//------------------------------------------------------------------------------
+// GB_matlab_helper9: compute the degree of each vector
+//------------------------------------------------------------------------------
+
+bool GB_matlab_helper9  // true if successful, false if out of memory
+(
+    GrB_Matrix A,       // input matrix
+    int64_t **degree,   // degree of each vector, size nvec
+    GrB_Index **list,   // list of non-empty vectors
+    GrB_Index *nvec     // # of non-empty vectors
+)
+{
+    int64_t anvec = A->nvec ;
+    GB_NTHREADS (anvec) ;
+
+    uint64_t *List = NULL ;
+    int64_t  *Degree = NULL ;
+    GB_MALLOC_MEMORY (List,   anvec, sizeof (int64_t)) ;
+    GB_MALLOC_MEMORY (Degree, anvec, sizeof (int64_t)) ;
+
+    if (List == NULL || Degree == NULL)
+    {
+        GB_FREE_MEMORY (List,   anvec, sizeof (int64_t)) ;
+        GB_FREE_MEMORY (Degree, anvec, sizeof (int64_t)) ;
+        return (false) ;
+    }
+
+    int64_t *Ah = A->h ;
+    int64_t *Ap = A->p ;
+
+    int64_t k ;
+    #pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (k = 0 ; k < anvec ; k++)
+    {
+        List [k] = (Ah == NULL) ? k : Ah [k] ;
+        Degree [k] = Ap [k+1] - Ap [k] ;
+    }
+
+    // return result
+    (*degree) = Degree ;
+    (*list) = List ;
+    (*nvec) = anvec ;
+    return (true) ;
+}
+
+//------------------------------------------------------------------------------
+// GB_matlab_helper10: compute norm (x-y,p) of two dense FP32 or FP64 vectors
+//------------------------------------------------------------------------------
+
+// p can be:
+
+//      0 or 2:     2-norm, sqrt (sum ((x-y).^2))
+//      1:          1-norm, sum (abs (x-y))
+//      INT64_MAX   inf-norm, max (abs (x-y))
+//      INT64_MIN   (-inf)-norm, min (abs (x-y))
+//      other:      p-norm not yet computed
+
+double GB_matlab_helper10       // norm (x-y,p), or -1 on error
+(
+    GB_void *x_arg,             // float or double, depending on type parameter
+    GB_void *y_arg,             // same type as x, treat as zero if NULL
+    GrB_Type type,              // GrB_FP32 or GrB_FP64
+    int64_t p,                  // 0, 1, 2, INT64_MIN, or INT64_MAX
+    GrB_Index n
+)
+{
+
+    //--------------------------------------------------------------------------
+    // check inputs
+    //--------------------------------------------------------------------------
+
+    if (!(type == GrB_FP32 || type == GrB_FP64))
+    {
+        // type of x and y must be GrB_FP32 or GrB_FP64
+        return ((double) -1) ;
+    }
+
+    if (n == 0)
+    {
+        return ((double) 0) ;
+    }
+
+    //--------------------------------------------------------------------------
+    // allocate workspace and determine # of threads to use
+    //--------------------------------------------------------------------------
+
+    GB_NTHREADS (n) ;
+    GB_ALLOCATE_WORK (double) ;
+
+    //--------------------------------------------------------------------------
+    // each thread computes its partial norm
+    //--------------------------------------------------------------------------
+
+    int tid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (tid = 0 ; tid < nthreads ; tid++)
+    {
+        int64_t k1, k2 ;
+        GB_PARTITION (k1, k2, n, tid, nthreads) ;
+
+        if (type == GrB_FP32)
+        {
+
+            //------------------------------------------------------------------
+            // FP32 case
+            //------------------------------------------------------------------
+
+            float my_s = 0 ;
+            const float *x = (float *) x_arg ;
+            const float *y = (float *) y_arg ;
+            switch (p)
+            {
+                case 0:     // Frobenius norm
+                case 2:     // 2-norm: sqrt of sum of (x-y).^2
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            float t = x [k] ;
+                            my_s += (t*t) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            float t = (x [k] - y [k]) ;
+                            my_s += (t*t) ;
+                        }
+                    }
+                }
+                break ;
+
+                case 1:     // 1-norm: sum (abs (x-y))
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s += fabsf (x [k]) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s += fabsf (x [k] - y [k]) ;
+                        }
+                    }
+                }
+                break ;
+
+                case INT64_MAX:     // inf-norm: max (abs (x-y))
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmaxf (my_s, fabsf (x [k])) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmaxf (my_s, fabsf (x [k] - y [k])) ;
+                        }
+                    }
+                }
+                break ;
+
+                case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
+                {
+                    my_s = INFINITY ;
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fminf (my_s, fabsf (x [k])) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fminf (my_s, fabsf (x [k] - y [k])) ;
+                        }
+                    }
+                }
+                break ;
+
+                default: ;  // p-norm not yet supported
+            }
+            Work [tid] = (double) my_s ;
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // FP64 case
+            //------------------------------------------------------------------
+
+            double my_s = 0 ;
+            const double *x = (double *) x_arg ;
+            const double *y = (double *) y_arg ;
+            switch (p)
+            {
+                case 0:     // Frobenius norm
+                case 2:     // 2-norm: sqrt of sum of (x-y).^2
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            double t = x [k] ;
+                            my_s += (t*t) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            double t = (x [k] - y [k]) ;
+                            my_s += (t*t) ;
+                        }
+                    }
+                }
+                break ;
+
+                case 1:     // 1-norm: sum (abs (x-y))
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s += fabs (x [k]) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s += fabs (x [k] - y [k]) ;
+                        }
+                    }
+                }
+                break ;
+
+                case INT64_MAX:     // inf-norm: max (abs (x-y))
+                {
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmax (my_s, fabs (x [k])) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmax (my_s, fabs (x [k] - y [k])) ;
+                        }
+                    }
+                }
+                break ;
+
+                case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
+                {
+                    my_s = INFINITY ;
+                    if (y == NULL)
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmin (my_s, fabs (x [k])) ;
+                        }
+                    }
+                    else
+                    {
+                        for (int64_t k = k1 ; k < k2 ; k++)
+                        {
+                            my_s = fmin (my_s, fabs (x [k] - y [k])) ;
+                        }
+                    }
+                }
+                break ;
+
+                default: ;  // p-norm not yet supported
+            }
+
+            Work [tid] = my_s ;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // combine results of each thread
+    //--------------------------------------------------------------------------
+
+    double s = 0 ;
+    switch (p)
+    {
+        case 0:     // Frobenius norm
+        case 2:     // 2-norm: sqrt of sum of (x-y).^2
+        {
+            for (int64_t tid = 0 ; tid < nthreads ; tid++)
+            {
+                s += Work [tid] ;
+            }
+            s = sqrt (s) ;
+        }
+        break ;
+
+        case 1:     // 1-norm: sum (abs (x-y))
+        {
+            for (int64_t tid = 0 ; tid < nthreads ; tid++)
+            {
+                s += Work [tid] ;
+            }
+        }
+        break ;
+
+        case INT64_MAX:     // inf-norm: max (abs (x-y))
+        {
+            for (int64_t tid = 0 ; tid < nthreads ; tid++)
+            {
+                s = fmax (s, Work [tid]) ;
+            }
+        }
+        break ;
+
+        case INT64_MIN:     // (-inf)-norm: min (abs (x-y))
+        {
+            s = Work [0] ;
+            for (int64_t tid = 1 ; tid < nthreads ; tid++)
+            {
+                s = fmin (s, Work [tid]) ;
+            }
+        }
+        break ;
+
+        default:    // p-norm not yet supported
+            s = -1 ;
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK (double) ;
+    return (s) ;
+}
+
diff --git a/Source/GB_matlab_helper.h b/Source/GB_matlab_helper.h
index dc96c951dc..588d576dca 100644
--- a/Source/GB_matlab_helper.h
+++ b/Source/GB_matlab_helper.h
@@ -2,7 +2,7 @@
 // GB_matlab_helper.h: helper functions for MATLAB interface
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -99,5 +99,24 @@ void GB_matlab_helper8
     size_t s            // size of each scalar
 ) ;
 
+GB_PUBLIC
+bool GB_matlab_helper9  // true if successful, false if out of memory
+(
+    GrB_Matrix A,       // input matrix
+    int64_t **degree,   // degree of each vector, size nvec
+    GrB_Index **list,   // list of non-empty vectors
+    GrB_Index *nvec     // # of non-empty vectors
+) ;
+
+GB_PUBLIC
+double GB_matlab_helper10       // norm (x-y,p)
+(
+    GB_void *x_arg,             // float or double, depending on type parameter
+    GB_void *y_arg,             // same type as x, treat as zero if NULL
+    GrB_Type type,              // GrB_FP32 or GrB_FP64
+    int64_t p,                  // 0, 1, 2, INT64_MIN, or INT64_MAX
+    GrB_Index n
+) ;
+
 #endif
 
diff --git a/Source/GB_matvec_build.c b/Source/GB_matvec_build.c
index 0af7ddbbdb..a80dc76507 100644
--- a/Source/GB_matvec_build.c
+++ b/Source/GB_matvec_build.c
@@ -2,7 +2,7 @@
 // GB_matvec_build: check inputs and build a matrix or vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_matvec_check.c b/Source/GB_matvec_check.c
index 248ec768e6..cb09cbdc26 100644
--- a/Source/GB_matvec_check.c
+++ b/Source/GB_matvec_check.c
@@ -2,7 +2,7 @@
 // GB_matvec_check: print a GraphBLAS matrix and check if it is valid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -76,7 +76,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
                 (A->is_slice ? "slice" : "sparse")) ;
         GBPR (" %s:\n", A->is_csc ? "by col" : "by row") ;
 
-        #ifdef GB_DEVELOPER
+        #if GB_DEVELOPER
         GBPR ("  max # entries: "GBd"\n", A->nzmax) ;
         GBPR ("  vlen: "GBd, A->vlen) ;
         if (A->nvec_nonempty != -1)
@@ -157,7 +157,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
 
     GB_Pending Pending = A->Pending ;
 
-    #ifdef GB_DEVELOPER
+    #if GB_DEVELOPER
     // a matrix contains 1 to 9 different allocated blocks
     int64_t nallocs = 1 +                       // header
         (A->h != NULL && !A->h_shallow) +       // A->h, if not shallow
@@ -175,7 +175,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
     // check the type
     //--------------------------------------------------------------------------
 
-    #ifdef GB_DEVELOPER
+    #if GB_DEVELOPER
     int pr_type = pr ;
     #else
     int pr_type = 0 ;
@@ -193,7 +193,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
     // report last method used for C=A*B
     //--------------------------------------------------------------------------
 
-    #ifdef GB_DEVELOPER
+    #if GB_DEVELOPER
     if (pr > 1 && A->AxB_method_used != GxB_DEFAULT)
     {
         GBPR ("  last method used for GrB_mxm, vxm, or mxv: ") ;
@@ -212,7 +212,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
     // report shallow structure
     //--------------------------------------------------------------------------
 
-    #ifdef GB_DEVELOPER
+    #if GB_DEVELOPER
     if (pr > 1) GBPR ("  ->h: %p shallow: %d\n", A->h, A->h_shallow) ;
     if (pr > 1) GBPR ("  ->p: %p shallow: %d\n", A->p, A->p_shallow) ;
     if (pr > 1) GBPR ("  ->i: %p shallow: %d\n", A->i, A->i_shallow) ;
@@ -388,22 +388,22 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
 
     GBPR0 ("  ") ;
     if (name != NULL && strlen (name) > 0)
-    {
+    { 
         GBPR0 ("%s, ", GB_NAME) ;
     }
 
     // # of entries cannot be computed until all the tests above are OK
     int64_t anz = GB_NNZ (A) ;
     if (anz == 0)
-    {
+    { 
         GBPR0 ("no entries\n") ;
     }
     else if (anz == 1)
-    {
+    { 
         GBPR0 ("1 entry\n") ;
     }
     else
-    {
+    { 
         GBPR0 (GBd" entries\n", anz) ;
     }
 
@@ -462,7 +462,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
                 // print the header for vector j
                 if (prcol)
                 { 
-                    #ifdef GB_DEVELOPER
+                    #if GB_DEVELOPER
                     GBPR ("  %s: "GBd" : "GBd" entries ["GBd":"GBd"]\n",
                         A->is_csc ? "column" : "row", j, pend - p, p, pend-1) ;
                     #endif
@@ -470,7 +470,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
                 else if (pr == 2 && jcount == GB_NBRIEF)
                 { 
                     truncated = true ;
-                    #ifdef GB_DEVELOPER
+                    #if GB_DEVELOPER
                     GBPR ("    ...\n") ;
                     #endif
                 }
@@ -484,7 +484,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
             { 
                 if ((pr > 1 && p < GB_NZBRIEF) || pr > 2)
                 { 
-                    #ifdef GB_DEVELOPER
+                    #if GB_DEVELOPER
                     GBPR ("    %s "GBd": ", A->is_csc ? "row":"column", i) ;
                     #else
                     if (A->is_csc)
@@ -500,7 +500,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
                 else if (pr == 2 && (ilast == -1 || p == GB_NZBRIEF))
                 { 
                     truncated = true ;
-                    #ifdef GB_DEVELOPER
+                    #if GB_DEVELOPER
                     GBPR ("        ...\n") ;
                     #endif
                 }
@@ -548,7 +548,9 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
         }
     }
 
-    #ifndef GB_DEVELOPER
+    #if GB_DEVELOPER
+    // ... already printed
+    #else
     if (pr == 2 && truncated) GBPR ("    ...\n") ;
     #endif
 
@@ -570,7 +572,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
     // check and print the pending tuples
     //--------------------------------------------------------------------------
 
-    #ifdef GB_DEVELOPER
+    #if GB_DEVELOPER
     if (pr > 1) GBPR ("  Pending %p\n", Pending) ;
     #endif
 
@@ -581,7 +583,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
         // A has pending tuples
         //---------------------------------------------------------------------
 
-        #ifdef GB_DEVELOPER
+        #if GB_DEVELOPER
         if (pr > 1) GBPR ("  Pending->i %p\n", Pending->i) ;
         if (pr > 1) GBPR ("  Pending->j %p\n", Pending->j) ;
         if (pr > 1) GBPR ("  Pending->x %p\n", Pending->x) ;
@@ -684,7 +686,7 @@ GrB_Info GB_matvec_check    // check a GraphBLAS matrix or vector
 
         GB_CRITICAL (GB_queue_status (A, &head, &prev, &next, &enqd)) ;
 
-        #ifdef GB_DEVELOPER
+        #if GB_DEVELOPER
         if (pr > 1) GBPR ("  queue head  %p\n", head) ;
         if (pr > 1) GBPR ("  queue prev  %p\n", prev) ;
         if (pr > 1) GBPR ("  queue next  %p\n", next) ;
diff --git a/Source/GB_type.c b/Source/GB_matvec_type.c
similarity index 85%
rename from Source/GB_type.c
rename to Source/GB_matvec_type.c
index 93f5537292..5c3994e30c 100644
--- a/Source/GB_type.c
+++ b/Source/GB_matvec_type.c
@@ -1,15 +1,15 @@
 //------------------------------------------------------------------------------
-// GB_type: return the type of a matrix
+// GB_matvec_type: return the type of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 #include "GB.h"
 
-GrB_Info GB_type            // get the type of a matrix
+GrB_Info GB_matvec_type            // get the type of a matrix
 (
     GrB_Type *type,         // returns the type of the matrix
     const GrB_Matrix A,     // matrix to query
diff --git a/Source/GB_memcpy.c b/Source/GB_memcpy.c
index 838ccf476e..43a8dcae80 100644
--- a/Source/GB_memcpy.c
+++ b/Source/GB_memcpy.c
@@ -2,7 +2,7 @@
 // GB_memcpy: parallel memcpy
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,7 +11,7 @@
 
 #include "GB.h"
 
-#define GB_CHUNK (1024*1024)
+#define GB_MEM_CHUNK (1024*1024)
 
 void GB_memcpy                  // parallel memcpy
 (
@@ -22,7 +22,7 @@ void GB_memcpy                  // parallel memcpy
 )
 {
 
-    if (nthreads <= 1 || n <= GB_CHUNK)
+    if (nthreads <= 1 || n <= GB_MEM_CHUNK)
     { 
 
         //----------------------------------------------------------------------
@@ -38,8 +38,8 @@ void GB_memcpy                  // parallel memcpy
         // memcpy using a multiple threads
         //----------------------------------------------------------------------
 
-        nthreads = GB_IMIN (nthreads, n / GB_CHUNK) ;
-        size_t nchunks = 1 + (n / GB_CHUNK) ;
+        nthreads = GB_IMIN (nthreads, n / GB_MEM_CHUNK) ;
+        size_t nchunks = 1 + (n / GB_MEM_CHUNK) ;
         GB_void *pdest = dest ;
         const GB_void *psrc = src ;
 
@@ -47,10 +47,10 @@ void GB_memcpy                  // parallel memcpy
         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
         for (k = 0 ; k < nchunks ; k++)
         {
-            size_t start = k * GB_CHUNK ;
+            size_t start = k * GB_MEM_CHUNK ;
             if (start < n)
             { 
-                size_t chunk = GB_IMIN (n - start, GB_CHUNK) ;
+                size_t chunk = GB_IMIN (n - start, GB_MEM_CHUNK) ;
                 memcpy (pdest + start, psrc + start, chunk) ;
             }
         }
diff --git a/Source/GB_msort_1.c b/Source/GB_msort_1.c
new file mode 100644
index 0000000000..987dd3c5f6
--- /dev/null
+++ b/Source/GB_msort_1.c
@@ -0,0 +1,359 @@
+//------------------------------------------------------------------------------
+// GB_msort_1: sort a list of integers
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// A parallel mergesort of an array of n integers.
+
+#include "GB_msort_1.h"
+
+//------------------------------------------------------------------------------
+// GB_merge_sequential_1: merge two sorted lists via a single thread
+//------------------------------------------------------------------------------
+
+// merge Left [0..nleft-1] and Right [0..nright-1] into S [0..nleft+nright-1] */
+
+static void GB_merge_sequential_1
+(
+    int64_t *GB_RESTRICT S_0,              // output of length nleft + nright
+    const int64_t *GB_RESTRICT Left_0,     // left input of length nleft
+    const int64_t nleft,
+    const int64_t *GB_RESTRICT Right_0,    // right input of length nright
+    const int64_t nright
+)
+{
+    int64_t p, pleft, pright ;
+
+    // merge the two inputs, Left and Right, while both inputs exist
+    for (p = 0, pleft = 0, pright = 0 ; pleft < nleft && pright < nright ; p++)
+    {
+        if (GB_lt_1 (Left_0, pleft, Right_0, pright))
+        { 
+            // S [p] = Left [pleft++]
+            S_0 [p] = Left_0 [pleft] ;
+            pleft++ ;
+        }
+        else
+        { 
+            // S [p] = Right [pright++]
+            S_0 [p] = Right_0 [pright] ;
+            pright++ ;
+        }
+    }
+
+    // either input is exhausted; copy the remaining list into S
+    if (pleft < nleft)
+    { 
+        int64_t nremaining = (nleft - pleft) ;
+        memcpy (S_0 + p, Left_0 + pleft, nremaining * sizeof (int64_t)) ;
+    }
+    else if (pright < nright)
+    { 
+        int64_t nremaining = (nright - pright) ;
+        memcpy (S_0 + p, Right_0 + pright, nremaining * sizeof (int64_t)) ;
+    }
+}
+
+//------------------------------------------------------------------------------
+// GB_merge_parallel_1: parallel merge
+//------------------------------------------------------------------------------
+
+// The two input arrays, Bigger [0..nbigger-1] and Smaller [0..nsmaller-1], are
+// sorted.  They are merged into the output array S [0..nleft+nright-1], using
+// a parallel merge.  nbigger >= nsmaller always holds.
+
+void GB_merge_parallel_1                // parallel merge
+(
+    int64_t *GB_RESTRICT S_0,           // output of length nbigger + nsmaller
+    const int64_t *GB_RESTRICT Bigger_0,   // Bigger [0..nbigger-1]
+    const int64_t nbigger,
+    const int64_t *GB_RESTRICT Smaller_0,  // Smaller [0..nsmaller-1]
+    const int64_t nsmaller
+)
+{
+
+    //--------------------------------------------------------------------------
+    // split the bigger input in half
+    //--------------------------------------------------------------------------
+
+    // The first task will handle Bigger [0..nhalf-1], and the second task
+    // will handle Bigger [nhalf..n-1].
+
+    int64_t nhalf = nbigger/2 ;
+    int64_t Pivot_0 [1] ; Pivot_0 [0] = Bigger_0 [nhalf] ;
+
+    //--------------------------------------------------------------------------
+    // find where the Pivot appears in the smaller list
+    //--------------------------------------------------------------------------
+
+    // binary search of Smaller [0..nsmaller-1] for the Pivot
+
+    long pleft = 0, pright = nsmaller-1 ;
+    while (pleft < pright)
+    {
+        long pmiddle = (pleft + pright) / 2 ;
+        if (GB_lt_1 (Smaller_0, pmiddle, Pivot_0, 0))
+        { 
+            // if in the list, Pivot appears in [pmiddle+1..pright]
+            pleft = pmiddle + 1 ;
+        }
+        else
+        { 
+            // if in the list, Pivot appears in [pleft..pmiddle]
+            pright = pmiddle ;
+        }
+    }
+
+    // binary search is narrowed down to a single item
+    // or it has found the list is empty:
+    ASSERT (pleft == pright || pleft == pright + 1) ;
+
+    // If found is true then Smaller [pleft == pright] == Pivot.  If duplicates
+    // appear then Smaller [pleft] is any one of the entries equal to the Pivot
+    // in the list.  If found is false then
+    //    Smaller [original_pleft ... pleft-1] < Pivot and
+    //    Smaller [pleft+1 ... original_pright] > Pivot holds.
+    //    The value Smaller [pleft] may be either < or > Pivot.
+    bool found = (pleft == pright &&
+        Smaller_0 [pleft] == Pivot_0 [0]) ;
+
+    // Modify pleft and pright:
+    if (!found && (pleft == pright))
+    { 
+        if (GB_lt_1 (Smaller_0, pleft, Pivot_0, 0))
+        {
+            pleft++ ;
+        }
+        else
+        {
+            pright++ ;
+        }
+    }
+
+    // Now the following conditions hold:
+
+    // If found is false then
+    //    Smaller [original_pleft ... pleft-1] < Pivot and
+    //    Smaller [pleft ... original_pright] > Pivot holds,
+    //    and pleft-1 == pright
+
+    // If Smaller has no duplicates, then whether or not Pivot is found,
+    //    Smaller [original_pleft ... pleft-1] < Pivot and
+    //    Smaller [pleft ... original_pright] >= Pivot holds.
+
+    //--------------------------------------------------------------------------
+    // merge each part in parallel
+    //--------------------------------------------------------------------------
+
+    // The first task merges Bigger [0..nhalf-1] and Smaller [0..pleft-1] into
+    // the output S [0..nhalf+pleft-1].  The entries in Bigger [0..nhalf-1] are
+    // all < Pivot (if no duplicates appear in Bigger) or <= Pivot otherwise.
+
+    int64_t *GB_RESTRICT S_task0_0 = S_0 ;
+
+    const int64_t *GB_RESTRICT Left_task0_0 = Bigger_0 ;
+    const int64_t nleft_task0 = nhalf ;
+
+    const int64_t *GB_RESTRICT Right_task0_0 = Smaller_0 ;
+    const int64_t nright_task0 = pleft ;
+
+    // The second task merges Bigger [nhalf..nbigger-1] and
+    // Smaller [pleft..nsmaller-1] into the output S [nhalf+pleft..n-1].
+    // The entries in Bigger [nhalf..nbigger-1] and Smaller [pleft..nsmaller-1]
+    // are all >= Pivot.
+
+    int64_t *GB_RESTRICT S_task1_0 = S_0 + nhalf + pleft ;
+
+    const int64_t *GB_RESTRICT Left_task1_0 = Bigger_0 + nhalf ;
+    const int64_t nleft_task1 = (nbigger - nhalf) ;
+
+    const int64_t *GB_RESTRICT Right_task1_0 = Smaller_0 + pleft ;
+    const int64_t nright_task1 = (nsmaller - pleft) ;
+
+    GB_TASK (GB_merge_select_1, S_task0_0,
+        Left_task0_0,  nleft_task0,
+        Right_task0_0, nright_task0) ;
+
+    GB_TASK (GB_merge_select_1, S_task1_0,
+        Left_task1_0,  nleft_task1,
+        Right_task1_0, nright_task1) ;
+
+    GB_TASK_WAIT
+}
+
+//------------------------------------------------------------------------------
+// GB_merge_select_1: parallel or sequential merge
+//------------------------------------------------------------------------------
+
+// The two input arrays, Left [0..nleft-1] and Right [0..nright-1], are sorted.
+// They are merged into the output array S [0..nleft+nright-1], using either
+// the sequential merge (for small lists) or the parallel merge (for big
+// lists).
+
+void GB_merge_select_1      // parallel or sequential merge of 2-by-n arrays
+(
+    int64_t *GB_RESTRICT S_0,              // output of length nleft+nright
+    const int64_t *GB_RESTRICT Left_0,     // Left [0..nleft-1]
+    const int64_t nleft,
+    const int64_t *GB_RESTRICT Right_0,    // Right [0..nright-1]
+    const int64_t nright
+)
+{
+
+    if (nleft + nright < GB_BASECASE)
+    { 
+        // sequential merge
+        GB_merge_sequential_1 (S_0, Left_0, nleft, Right_0, nright) ;
+    }
+    else if (nleft >= nright)
+    { 
+        // parallel merge, where Left [0..nleft-1] is the bigger of the two.
+        GB_merge_parallel_1 (S_0, Left_0, nleft, Right_0, nright) ;
+    }
+    else
+    { 
+        // parallel merge, where Right [0..nright-1] is the bigger of the two.
+        GB_merge_parallel_1 (S_0, Right_0, nright, Left_0, nleft) ;
+    }
+}
+
+//------------------------------------------------------------------------------
+// GB_mergesort_1:  parallel merge sort of a length-n array
+//------------------------------------------------------------------------------
+
+// GB_mergesort_1 sorts an int64_t array A of size n in ascending
+// order, using a parallel mergesort.  W is a workspace array of size n.
+// Small arrays are sorted with a quicksort method.
+
+void GB_mergesort_1 // sort array A of size n
+(
+    int64_t *GB_RESTRICT A_0,      // size n array
+    int64_t *GB_RESTRICT W_0,      // size n array, workspace
+    const int64_t n
+)
+{
+
+    if (n <= GB_BASECASE)
+    { 
+
+        // ---------------------------------------------------------------------
+        // sequential quicksort; no workspace needed
+        // ---------------------------------------------------------------------
+
+        GB_qsort_1a (A_0, n) ;
+
+    }
+    else
+    { 
+
+        // ---------------------------------------------------------------------
+        // recursive merge sort if A has length greater than GB_BASECASE
+        // ---------------------------------------------------------------------
+
+        // ---------------------------------------------------------------------
+        // split A into four quarters
+        // ---------------------------------------------------------------------
+
+        int64_t n12 = n / 2 ;           // split n into n12 and n34
+        int64_t n34 = n - n12 ;
+
+        int64_t n1 = n12 / 2 ;          // split n12 into n1 and n2
+        int64_t n2 = n12 - n1 ;
+
+        int64_t n3 = n34 / 2 ;          // split n34 into n3 and n4
+        int64_t n4 = n34 - n3 ;
+
+        int64_t n123 = n12 + n3 ;       // start of 4th quarter = n1 + n2 + n3
+
+        // 1st quarter of A and W
+        int64_t *GB_RESTRICT A_1st0 = A_0 ;
+
+        int64_t *GB_RESTRICT W_1st0 = W_0 ;
+
+        // 2nd quarter of A and W
+        int64_t *GB_RESTRICT A_2nd0 = A_0 + n1 ;
+
+        int64_t *GB_RESTRICT W_2nd0 = W_0 + n1 ;
+
+        // 3rd quarter of A and W
+        int64_t *GB_RESTRICT A_3rd0 = A_0 + n12 ;
+
+        int64_t *GB_RESTRICT W_3rd0 = W_0 + n12 ;
+
+        // 4th quarter of A and W
+        int64_t *GB_RESTRICT A_4th0 = A_0 + n123 ;
+
+        int64_t *GB_RESTRICT W_4th0 = W_0 + n123 ;
+
+        // ---------------------------------------------------------------------
+        // sort each quarter of A in parallel, using W as workspace
+        // ---------------------------------------------------------------------
+
+        GB_TASK (GB_mergesort_1, A_1st0, W_1st0, n1) ;
+        GB_TASK (GB_mergesort_1, A_2nd0, W_2nd0, n2) ;
+        GB_TASK (GB_mergesort_1, A_3rd0, W_3rd0, n3) ;
+        GB_TASK (GB_mergesort_1, A_4th0, W_4th0, n4) ;
+
+        GB_TASK_WAIT
+
+        // ---------------------------------------------------------------------
+        // merge pairs of quarters of A into two halves of W, in parallel
+        // ---------------------------------------------------------------------
+
+        GB_TASK (GB_merge_select_1, W_1st0, A_1st0, n1, A_2nd0, n2) ;
+        GB_TASK (GB_merge_select_1, W_3rd0, A_3rd0, n3, A_4th0, n4) ;
+
+        GB_TASK_WAIT
+
+        // ---------------------------------------------------------------------
+        // merge the two halves of W into A
+        // ---------------------------------------------------------------------
+
+        GB_merge_select_1 (A_0, W_1st0, n12, W_3rd0, n34) ;
+    }
+}
+
+//------------------------------------------------------------------------------
+// GB_msort_1: gateway for parallel merge sort
+//------------------------------------------------------------------------------
+
+void GB_msort_1     // sort array A of size n.
+(
+    int64_t *GB_RESTRICT A_0,      // size n array
+    int64_t *GB_RESTRICT W_0,      // size n array, workspace
+    const int64_t n,
+    const int nthreads          // # of threads to use
+)
+{
+
+    if (GB_OPENMP_GET_NUM_THREADS > 1)
+    {
+
+        // ---------------------------------------------------------------------
+        // parallel mergesort: already in parallel region
+        // ---------------------------------------------------------------------
+
+        // GB_msort_1 is already in a parallel region in the caller.  This
+        // does not occur inside GraphBLAS, but the user application might be
+        // calling GraphBLAS inside its own parallel region.
+
+        GB_mergesort_1 (A_0, W_0, n) ;
+
+    }
+    else
+    { 
+
+        // ---------------------------------------------------------------------
+        // parallel mergesort: start a parallel region
+        // ---------------------------------------------------------------------
+
+        GB_TASK_MASTER (nthreads)
+        GB_mergesort_1 (A_0, W_0, n) ;
+
+    }
+}
+
diff --git a/Source/GB_msort_1.h b/Source/GB_msort_1.h
new file mode 100644
index 0000000000..b584dd337d
--- /dev/null
+++ b/Source/GB_msort_1.h
@@ -0,0 +1,42 @@
+//------------------------------------------------------------------------------
+// GB_msort_1.h: definitions for GB_msort_1.c
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// A parallel mergesort of an array of n integers.
+
+#include "GB_sort.h"
+
+//------------------------------------------------------------------------------
+// prototypes only needed for GB_msort_1
+//------------------------------------------------------------------------------
+
+void GB_merge_parallel_1                // parallel merge
+(
+    int64_t *GB_RESTRICT S_0,           // output of length nbigger + nsmaller
+    const int64_t *GB_RESTRICT Bigger_0,   // Bigger [0..nbigger-1]
+    const int64_t nbigger,
+    const int64_t *GB_RESTRICT Smaller_0,  // Smaller [0..nsmaller-1]
+    const int64_t nsmaller
+) ;
+
+void GB_merge_select_1      // parallel or sequential merge
+(
+    int64_t *GB_RESTRICT S_0,              // output of length nleft+nright
+    const int64_t *GB_RESTRICT Left_0,     // Left [0..nleft-1]
+    const int64_t nleft,
+    const int64_t *GB_RESTRICT Right_0,    // Right [0..nright-1]
+    const int64_t nright
+) ;
+
+void GB_mergesort_1 // sort array A of size n
+(
+    int64_t *GB_RESTRICT A_0,      // size n array
+    int64_t *GB_RESTRICT W_0,      // size n array, workspace
+    const int64_t n
+) ;
+
diff --git a/Source/GB_msort_2.c b/Source/GB_msort_2.c
index 7dd4ed4c73..8bec0ff3c0 100644
--- a/Source/GB_msort_2.c
+++ b/Source/GB_msort_2.c
@@ -2,7 +2,7 @@
 // GB_msort_2: sort a 2-by-n list of integers, using A[0:1][ ] as the key
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -102,8 +102,6 @@ void GB_merge_parallel_2                // parallel merge
     // find where the Pivot appears in the smaller list
     //--------------------------------------------------------------------------
 
-    // This is like GB_BINARY_TRIM_SEARCH, but applied to a 2-by-n array.
-
     // binary search of Smaller [0..nsmaller-1] for the Pivot
 
     long pleft = 0, pright = nsmaller-1 ;
diff --git a/Source/GB_msort_2.h b/Source/GB_msort_2.h
index fe0d0095c4..752ff9f4fe 100644
--- a/Source/GB_msort_2.h
+++ b/Source/GB_msort_2.h
@@ -2,7 +2,7 @@
 // GB_msort_2.h: definitions for GB_msort_2.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_msort_3.c b/Source/GB_msort_3.c
index 99b09c318c..7a3bff16e4 100644
--- a/Source/GB_msort_3.c
+++ b/Source/GB_msort_3.c
@@ -2,7 +2,7 @@
 // GB_msort_3: sort a 3-by-n list of integers, using A[0:2][ ] as the key
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -114,8 +114,6 @@ void GB_merge_parallel_3                // parallel merge
     // find where the Pivot appears in the smaller list
     //--------------------------------------------------------------------------
 
-    // This is like GB_BINARY_TRIM_SEARCH, but applied to a 3-by-n array.
-
     // binary search of Smaller [0..nsmaller-1] for the Pivot
 
     long pleft = 0, pright = nsmaller-1 ;
@@ -408,7 +406,7 @@ void GB_mergesort_3 // sort array A of size 3-by-n, using 3 keys (A [0:1][])
 // GB_msort_3: gateway for parallel merge sort
 //------------------------------------------------------------------------------
 
-void GB_msort_3     // sort array A of size 2-by-n, using 2 keys (A [0:1][])
+void GB_msort_3     // sort array A of size 3-by-n, using 3 keys (A [0:2][])
 (
     int64_t *GB_RESTRICT A_0,      // size n array
     int64_t *GB_RESTRICT A_1,      // size n array
diff --git a/Source/GB_msort_3.h b/Source/GB_msort_3.h
index d15f71df63..254e8abd56 100644
--- a/Source/GB_msort_3.h
+++ b/Source/GB_msort_3.h
@@ -2,7 +2,7 @@
 // GB_msort_3.h: definitions for GB_msort_3.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_mxm.c b/Source/GB_mxm.c
index 06ce2b8e22..c83ea59109 100644
--- a/Source/GB_mxm.c
+++ b/Source/GB_mxm.c
@@ -2,7 +2,7 @@
 // GB_mxm: matrix-matrix multiply for GrB_mxm, GrB_mxv, and GrB_vxm
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GB_mxm                     // C<M> = A*B
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_Semiring semiring,    // defines '+' and '*' for C=A*B
     const GrB_Matrix A,             // input matrix
@@ -105,14 +106,19 @@ GrB_Info GB_mxm                     // C<M> = A*B
     // T = A*B, A'*B, A*B', or A'*B', also using the mask to cut time and memory
     //--------------------------------------------------------------------------
 
-    // the mask is used to cut time and memory usage for GB_AxB_meta,
-    // but only if it is not complemented.
+    // If C is dense (with no pending work), and the accum is present, then
+    // C+=A*B can be done in place (C_replace is effectively false).  If C is
+    // dense, M is present, and C_replace is false, then C<M>+=A*B or
+    // C<!M>+=A*B can also be done in place.  In all of these cases, C remains
+    // dense.
+
     bool mask_applied = false ;
-    bool C_is_csc = C->is_csc ;
-    GrB_Matrix T, MT = NULL ;
-    info = GB_AxB_meta (&T, C_is_csc, &MT, M, Mask_comp, A, B, semiring,
-        A_transpose, B_transpose, flipxy, &mask_applied, AxB_method,
-        &(C->AxB_method_used), Context) ;
+    bool done_in_place = false ;
+    GrB_Matrix T = NULL, MT = NULL ;
+    info = GB_AxB_meta (&T, C, C_replace, C->is_csc, &MT, M, Mask_comp,
+        Mask_struct, accum, A, B, semiring, A_transpose, B_transpose, flipxy,
+        &mask_applied, &done_in_place, AxB_method, &(C->AxB_method_used),
+        Context) ;
 
     if (info != GrB_SUCCESS)
     { 
@@ -122,6 +128,14 @@ GrB_Info GB_mxm                     // C<M> = A*B
         return (info) ;
     }
 
+    if (done_in_place)
+    { 
+        // C<...>+=A*B has been computed in place; no more work to do
+        GB_MATRIX_FREE (&MT) ;
+        ASSERT_MATRIX_OK (C, "C from GB_mxm (in place)", GB0) ;
+        return (info) ;
+    }
+
     ASSERT_MATRIX_OK (T, "T=A*B from GB_AxB_meta", GB0) ;
     ASSERT_MATRIX_OK_OR_NULL (MT, "MT from GB_AxB_meta", GB0) ;
     ASSERT (GB_ZOMBIES_OK (T)) ;
@@ -143,6 +157,25 @@ GrB_Info GB_mxm                     // C<M> = A*B
         // and is a pure transplant.  Also conform C to its desired
         // hypersparsity.
         GB_MATRIX_FREE (&MT) ;
+        if (GB_ZOMBIES (T) && T->type != C->type)
+        { 
+            // T = A*B can be constructed with zombies, using the dot3 method.
+            // Since its type differs from C, its values will be typecasted
+            // from T->type to C->type.  The zombies are killed before
+            // typecasting.  Otherwise, if they were not killed, uninitialized
+            // values in T->x for these zombies will get typecasted into C->x.
+            // Typecasting a zombie is safe, since the values of all zombies
+            // are ignored.  But valgrind complains about it, so they are
+            // killed now.  Also see the discussion in GB_transplant.
+            GBBURBLE ("(wait, so zombies are not typecasted) ") ;
+            info = GB_wait (T, Context) ;
+            if (info != GrB_SUCCESS)
+            { 
+                // out of memory
+                GB_MATRIX_FREE (&T) ;
+                return (info) ;
+            }
+        }
         info = GB_transplant_conform (C, C->type, &T, Context) ;
         #ifdef GB_DEBUG
         if (info == GrB_SUCCESS)
@@ -159,7 +192,7 @@ GrB_Info GB_mxm                     // C<M> = A*B
         // C<M> = accum (C,T)
         // GB_accum_mask also conforms C to its desired hypersparsity
         info = GB_accum_mask (C, M, MT, accum, &T, C_replace, Mask_comp,
-            Context) ;
+            Mask_struct, Context) ;
         GB_MATRIX_FREE (&MT) ;
         #ifdef GB_DEBUG
         if (info == GrB_SUCCESS)
diff --git a/Source/GB_mxm.h b/Source/GB_mxm.h
index 3b4c52a5ae..cb8837bdf0 100644
--- a/Source/GB_mxm.h
+++ b/Source/GB_mxm.h
@@ -2,14 +2,16 @@
 // GB_mxm.h: definitions for C=A*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 #ifndef GB_MXM_H
 #define GB_MXM_H
-#include "GB.h"
+#include "GB_AxB_saxpy3.h"
+
+//------------------------------------------------------------------------------
 
 GrB_Info GB_mxm                     // C<M> = A*B
 (
@@ -17,6 +19,7 @@ GrB_Info GB_mxm                     // C<M> = A*B
     const bool C_replace,           // if true, clear C before writing to it
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GrB_Semiring semiring,    // defines '+' and '*' for C=A*B
     const GrB_Matrix A,             // input matrix
@@ -28,106 +31,44 @@ GrB_Info GB_mxm                     // C<M> = A*B
     GB_Context Context
 ) ;
 
-GrB_Info GB_AxB_saxpy_parallel      // parallel C=A*B multiply
-(
-    GrB_Matrix *Chandle,            // output matrix, NULL on input
-    GrB_Matrix M,                   // optional mask matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
-    GrB_Desc_Value *AxB_method_used,// method selected by thread zero
-    bool *mask_applied,             // if true, mask was applied
-    GB_Context Context
-) ;
-
-GrB_Info GB_AxB_dot_parallel        // parallel C=A'*B
+GrB_Info GB_AxB_dot                 // dot product (multiple methods)
 (
     GrB_Matrix *Chandle,            // output matrix, NULL on input
+    GrB_Matrix C_in_place,          // input/output matrix, if done in place
     GrB_Matrix M,                   // optional mask matrix
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,             // input matrix A
     const GrB_Matrix B,             // input matrix B
     const GrB_Semiring semiring,    // semiring that defines C=A*B
     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
     bool *mask_applied,             // if true, mask was applied
+    bool *done_in_place,            // if true, C_in_place was computed in place
     GB_Context Context
 ) ;
 
-void GB_AxB_select                  // select method for A*B
-(
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
-    // output
-    GrB_Desc_Value *AxB_method_used,        // method to use
-    int64_t *bjnz_max                       // # entries in densest col of B
-) ;
-
-GrB_Info GB_AxB_saxpy_sequential    // single-threaded C<M>=A*B
-(
-    GrB_Matrix *Chandle,            // output matrix, NULL on input
-    GrB_Matrix M,                   // optional mask matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    const GrB_Desc_Value AxB_method,// already chosen
-    const int64_t bjnz_max,         // for heap method only
-    const bool check_for_dense_mask,// if true, check floplimit for mask 
-    bool *mask_applied,             // if true, mask was applied
-    const int Sauna_id              // Sauna to use, for Gustavson method only
-) ;
-
 GrB_Info GB_AxB_flopcount
 (
-    bool *result,               // result of test (total_flops <= floplimit)
-    int64_t *Bflops,            // size B->nvec+1 and all zero, if present
-    int64_t *Bflops_per_entry,  // size nnz(B)+1 and all zero, if present
+    int64_t *Mwork,             // amount of work to handle the mask M
+    int64_t *Bflops,            // size B->nvec+1 and all zero
     const GrB_Matrix M,         // optional mask matrix
+    const bool Mask_comp,       // if true, mask is complemented
     const GrB_Matrix A,
     const GrB_Matrix B,
-    int64_t floplimit,          // maximum flops to compute if Bflops NULL
     GB_Context Context
 ) ;
 
-GrB_Info GB_AxB_heap                // C<M>=A*B or C=A*B using a heap
-(
-    GrB_Matrix *Chandle,            // output matrix
-    const GrB_Matrix M_in,          // mask matrix for C<M>=A*B
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix
-    const GrB_Matrix B,             // input matrix
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    bool *mask_applied,             // if true, mask was applied
-    const int64_t bjnz_max          // max # entries in any vector of B
-) ;
-
-GrB_Info GB_AxB_Gustavson           // C=A*B or C<M>=A*B, Gustavson's method
-(
-    GrB_Matrix *Chandle,            // output matrix
-    const GrB_Matrix M_in,          // optional matrix
-    const bool Mask_comp,           // if true, use !M
-    const GrB_Matrix A,             // input matrix A
-    const GrB_Matrix B,             // input matrix B
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    bool *mask_applied,             // if true, mask was applied
-    const int Sauna_id              // Sauna to use
-) ;
-
 GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
 (
-    GrB_Matrix *Chandle,            // output matrix C
+    GrB_Matrix *Chandle,            // output matrix (if not done in place)
+    GrB_Matrix C_in_place,          // input/output matrix, if done in place
+    bool C_replace,                 // C matrix descriptor
     const bool C_is_csc,            // desired CSR/CSC format of C
     GrB_Matrix *MT_handle,          // return MT = M' to caller, if computed
     const GrB_Matrix M_in,          // mask for C<M> (not complemented)
     const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
+    const GrB_BinaryOp accum,       // accum operator for C_input += A*B
     const GrB_Matrix A_in,          // input matrix
     const GrB_Matrix B_in,          // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -135,7 +76,8 @@ GrB_Info GB_AxB_meta                // C<M>=A*B meta algorithm
     bool B_transpose,               // if true, use B', else B
     bool flipxy,                    // if true, do z=fmult(b,a) vs fmult(a,b)
     bool *mask_applied,             // if true, mask was applied
-    const GrB_Desc_Value AxB_method,// for auto vs user selection of methods
+    bool *done_in_place,            // if true, C was computed in place
+    GrB_Desc_Value AxB_method,      // for auto vs user selection of methods
     GrB_Desc_Value *AxB_method_used,// method selected
     GB_Context Context
 ) ;
@@ -160,18 +102,6 @@ GrB_Info GB_AxB_colscale            // C = A*D, column scale with diagonal D
     GB_Context Context
 ) ;
 
-GrB_Info GB_AxB_alloc           // estimate nnz(C) and allocate C for C=A*B
-(
-    GrB_Matrix *Chandle,        // output matrix
-    const GrB_Type ctype,       // type of C
-    const GrB_Index cvlen,      // vector length of C
-    const GrB_Index cvdim,      // # of vectors of C
-    const GrB_Matrix M,         // optional mask
-    const GrB_Matrix A,         // input matrix A
-    const GrB_Matrix B,         // input matrix B
-    const bool numeric,         // if true, allocate A->x, else A->x is NULL
-    const int64_t cnz_extra     // added to the rough estimate (if M NULL)
-) ;
 
 bool GB_AxB_semiring_builtin        // true if semiring is builtin
 (
@@ -189,18 +119,6 @@ bool GB_AxB_semiring_builtin        // true if semiring is builtin
     GB_Type_code *zcode             // type code for z output
 ) ;
 
-GrB_Info GB_AxB_Gustavson_builtin
-(
-    GrB_Matrix C,                   // output matrix
-    const GrB_Matrix M,             // M matrix for C<M> (not complemented)
-    const GrB_Matrix A,             // input matrix
-    const bool A_is_pattern,        // true if only the pattern of A is used
-    const GrB_Matrix B,             // input matrix
-    const bool B_is_pattern,        // true if only the pattern of B is used
-    const GrB_Semiring semiring,    // semiring that defines C=A*B
-    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
-    GB_Sauna Sauna                  // sparse accumulator
-) ;
 
 GrB_Info GB_AxB_dot2                // C = A'*B using dot product method
 (
@@ -211,6 +129,7 @@ GrB_Info GB_AxB_dot2                // C = A'*B using dot product method
     // dot3 is used for C<M>=A'*B
     const bool Mask_comp,           // if true, use !M
 #endif
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix *Aslice,       // input matrices (already sliced)
     const GrB_Matrix B,             // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -228,70 +147,11 @@ bool GB_is_diagonal             // true if A is diagonal
     GB_Context Context
 ) ;
 
-GrB_Info GB_hcat_fine_slice // horizontal concatenation and sum of slices of C
-(
-    GrB_Matrix *Chandle,    // output matrix C to create
-    int nthreads,           // # of slices to concatenate
-    GrB_Matrix *Cslice,     // array of slices of size nthreads
-    GrB_Monoid add,         // monoid to use to sum up the entries
-    int *Sauna_ids,         // size nthreads, Sauna id's of each thread
-    GB_Context Context
-) ;
-
-GrB_Info GB_hcat_slice      // horizontal concatenation of the slices of C
-(
-    GrB_Matrix *Chandle,    // output matrix C to create
-    int nthreads,           // # of slices to concatenate
-    GrB_Matrix *Cslice,     // array of slices of size nthreads
-    GB_Context Context
-) ;
-
-GrB_Info GB_fine_slice  // slice B into nthreads fine hyperslices
-(
-    GrB_Matrix B,       // matrix to slice
-    int nthreads,       // # of slices to create
-    int64_t *Slice,     // array of size nthreads+1 that defines the slice
-    GrB_Matrix *Bslice, // array of output slices, of size nthreads
-    GB_Context Context
-) ;
-
-GrB_Info GB_AxB_user
-(
-    const GrB_Desc_Value GB_AxB_method,
-    const GrB_Semiring GB_s,
-
-    GrB_Matrix *GB_Chandle,
-    const GrB_Matrix GB_M,
-    const GrB_Matrix GB_A,          // not used for dot2 method
-    const GrB_Matrix GB_B,
-    bool GB_flipxy,
-
-    // for heap method only:
-    int64_t *GB_RESTRICT GB_List,
-    GB_pointer_pair *GB_RESTRICT GB_pA_pair,
-    GB_Element *GB_RESTRICT GB_Heap,
-    const int64_t GB_bjnz_max,
-
-    // for Gustavson's method only:
-    GB_Sauna GB_C_Sauna,
-
-    // for dot method only:
-    const GrB_Matrix *GB_Aslice,    // for dot2 only
-    int64_t *GB_RESTRICT GB_B_slice,   // for dot2 only
-    const int GB_dot_nthreads,      // for dot2 and dot3
-    const int GB_naslice,           // for dot2 only
-    const int GB_nbslice,           // for dot2 only
-    int64_t **GB_C_counts,          // for dot2 only
-
-    // for dot3 method only:
-    const GB_task_struct *GB_RESTRICT GB_TaskList,
-    const int GB_ntasks
-) ;
-
 GrB_Info GB_AxB_dot3                // C<M> = A'*B using dot product method
 (
     GrB_Matrix *Chandle,            // output matrix
     const GrB_Matrix M,             // mask matrix for C<M>=A'*B or C<!M>=A'*B
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,             // input matrix
     const GrB_Matrix B,             // input matrix
     const GrB_Semiring semiring,    // semiring that defines C=A*B
@@ -323,5 +183,30 @@ GrB_Info GB_AxB_dot3_one_slice
     GB_Context Context
 ) ;
 
+GrB_Info GB_AxB_saxpy3              // C = A*B using Gustavson+Hash
+(
+    GrB_Matrix *Chandle,            // output matrix
+    GrB_Matrix M_input,             // optional mask matrix
+    const bool Mask_comp,           // if true, use !M
+    const bool Mask_struct,         // if true, use the only structure of M
+    const GrB_Matrix A,             // input matrix A
+    const GrB_Matrix B,             // input matrix B
+    const GrB_Semiring semiring,    // semiring that defines C=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    bool *mask_applied,             // if true, then mask was applied
+    const GrB_Desc_Value AxB_method,    // Default, Gustavson, or Hash
+    GB_Context Context
+) ;
+
+GrB_Info GB_AxB_dot4                // C+=A'*B, dot product method
+(
+    GrB_Matrix C,                   // input/output matrix, must be dense
+    const GrB_Matrix A,             // input matrix
+    const GrB_Matrix B,             // input matrix
+    const GrB_Semiring semiring,    // semiring that defines C+=A*B
+    const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
+    GB_Context Context
+) ;
+
 #endif
 
diff --git a/Source/GB_new.c b/Source/GB_new.c
index 4c6c921edc..718b83e47e 100644
--- a/Source/GB_new.c
+++ b/Source/GB_new.c
@@ -2,7 +2,7 @@
 // GB_new: create a new GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_nvals.c b/Source/GB_nvals.c
index 1216de40ac..1abcdc4b9b 100644
--- a/Source/GB_nvals.c
+++ b/Source/GB_nvals.c
@@ -2,7 +2,7 @@
 // GB_nvals: number of entries in a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_nvec_nonempty.c b/Source/GB_nvec_nonempty.c
index df705397c8..7061654941 100644
--- a/Source/GB_nvec_nonempty.c
+++ b/Source/GB_nvec_nonempty.c
@@ -2,7 +2,7 @@
 // GB_nvec_nonempty: count the number of non-empty vectors
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_op_is_second.c b/Source/GB_op_is_second.c
index 7af745f26b..e7e03028ca 100644
--- a/Source/GB_op_is_second.c
+++ b/Source/GB_op_is_second.c
@@ -2,7 +2,7 @@
 // GB_op_is_second: return true if op is the SECOND operator of the right type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,7 +22,8 @@ bool GB_op_is_second    // return true if op is SECOND, of the right type
         // of the right type
         return (true) ;
     }
-    else if (op->opcode == GB_SECOND_opcode)
+
+    if (op->opcode == GB_SECOND_opcode)
     {
         // op is the explict SECOND operator; check its type
         if (type == NULL)
@@ -35,16 +36,9 @@ bool GB_op_is_second    // return true if op is SECOND, of the right type
             // type is explicitly the right type
             return (true) ;
         }
-        else
-        { 
-            // wrong type
-            return (false) ;
-        }
-    }
-    else
-    { 
-        // wrong opcode
-        return (false) ;
     }
+
+    // wrong opcode or wrong type
+    return (false) ;
 }
 
diff --git a/Source/GB_ops.c b/Source/GB_ops.c
index ad42899b12..e67c2c66d7 100644
--- a/Source/GB_ops.c
+++ b/Source/GB_ops.c
@@ -2,13 +2,13 @@
 // GB_builtin.c: built-in types, functions, operators, and other externs
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
-// This file defines the predefined built-in objects: 11 types, 45 unary
-// operators, 256 binary operators, 44 monoids, and 960 semirings.
+// This file defines the predefined built-in types, descriptors, unary
+// operators, binary operators, monoids, and semirings.
 
 #include "GB.h"
 
@@ -44,6 +44,66 @@ GrB_Type
     GrB_FP32   = & GB_opaque_GrB_FP32   ,
     GrB_FP64   = & GB_opaque_GrB_FP64   ;
 
+//------------------------------------------------------------------------------
+// built-in descriptors
+//------------------------------------------------------------------------------
+
+#define o GxB_DEFAULT
+
+#define GB_DESC(name,out,mask,in0,in1)                          \
+struct GB_Descriptor_opaque GB_opaque_desc_ ## name =           \
+{                                                               \
+    GB_MAGIC,               /* initialized */                   \
+    out, mask, in0, in1,    /* settings in the spec */          \
+    o, o, o,                /* default: axb, #threads, chunk */ \
+    true                    /* pre-defined */                   \
+} ;                                                             \
+GrB_Descriptor GrB_DESC_ ## name = & GB_opaque_desc_ ## name ;
+
+//       name     outp         structure       comp      in0       in1
+
+// GrB_NULL     , o          , o             + o       , o       , o
+GB_DESC (T1     , o          , o             + o       , o       , GrB_TRAN )
+GB_DESC (T0     , o          , o             + o       , GrB_TRAN, o        )
+GB_DESC (T0T1   , o          , o             + o       , GrB_TRAN, GrB_TRAN )
+
+GB_DESC (C      , o          , o             + GrB_COMP, o       , o        )
+GB_DESC (CT1    , o          , o             + GrB_COMP, o       , GrB_TRAN )
+GB_DESC (CT0    , o          , o             + GrB_COMP, GrB_TRAN, o        )
+GB_DESC (CT0T1  , o          , o             + GrB_COMP, GrB_TRAN, GrB_TRAN )
+
+GB_DESC (S      , o          , GrB_STRUCTURE + o       , o       , o        )
+GB_DESC (ST1    , o          , GrB_STRUCTURE + o       , o       , GrB_TRAN )
+GB_DESC (ST0    , o          , GrB_STRUCTURE + o       , GrB_TRAN, o        )
+GB_DESC (ST0T1  , o          , GrB_STRUCTURE + o       , GrB_TRAN, GrB_TRAN )
+
+GB_DESC (SC     , o          , GrB_STRUCTURE + GrB_COMP, o       , o        )
+GB_DESC (SCT1   , o          , GrB_STRUCTURE + GrB_COMP, o       , GrB_TRAN )
+GB_DESC (SCT0   , o          , GrB_STRUCTURE + GrB_COMP, GrB_TRAN, o        )
+GB_DESC (SCT0T1 , o          , GrB_STRUCTURE + GrB_COMP, GrB_TRAN, GrB_TRAN )
+
+GB_DESC (R      , GrB_REPLACE, o             + o       , o       , o        )
+GB_DESC (RT1    , GrB_REPLACE, o             + o       , o       , GrB_TRAN )
+GB_DESC (RT0    , GrB_REPLACE, o             + o       , GrB_TRAN, o        )
+GB_DESC (RT0T1  , GrB_REPLACE, o             + o       , GrB_TRAN, GrB_TRAN )
+
+GB_DESC (RC     , GrB_REPLACE, o             + GrB_COMP, o       , o        )
+GB_DESC (RCT1   , GrB_REPLACE, o             + GrB_COMP, o       , GrB_TRAN )
+GB_DESC (RCT0   , GrB_REPLACE, o             + GrB_COMP, GrB_TRAN, o        )
+GB_DESC (RCT0T1 , GrB_REPLACE, o             + GrB_COMP, GrB_TRAN, GrB_TRAN )
+
+GB_DESC (RS     , GrB_REPLACE, GrB_STRUCTURE + o       , o       , o        )
+GB_DESC (RST1   , GrB_REPLACE, GrB_STRUCTURE + o       , o       , GrB_TRAN )
+GB_DESC (RST0   , GrB_REPLACE, GrB_STRUCTURE + o       , GrB_TRAN, o        )
+GB_DESC (RST0T1 , GrB_REPLACE, GrB_STRUCTURE + o       , GrB_TRAN, GrB_TRAN )
+
+GB_DESC (RSC    , GrB_REPLACE, GrB_STRUCTURE + GrB_COMP, o       , o        )
+GB_DESC (RSCT1  , GrB_REPLACE, GrB_STRUCTURE + GrB_COMP, o       , GrB_TRAN )
+GB_DESC (RSCT0  , GrB_REPLACE, GrB_STRUCTURE + GrB_COMP, GrB_TRAN, o        )
+GB_DESC (RSCT0T1, GrB_REPLACE, GrB_STRUCTURE + GrB_COMP, GrB_TRAN, GrB_TRAN )
+
+#undef o
+
 //------------------------------------------------------------------------------
 // built-in unary and binary operators
 //------------------------------------------------------------------------------
@@ -360,7 +420,7 @@ GB_MONOID_DEFINE_TERM ( GrB_, MAX_UINT16   , uint16_t , 0          , UINT16_MAX)
 GB_MONOID_DEFINE_TERM ( GrB_, MAX_UINT32   , uint32_t , 0          , UINT32_MAX)
 GB_MONOID_DEFINE_TERM ( GrB_, MAX_UINT64   , uint64_t , 0          , UINT64_MAX)
 GB_MONOID_DEFINE_TERM ( GrB_, MAX_FP32     , float    , -INFINITY  , INFINITY  )
-GB_MONOID_DEFINE_TERM ( GrB_, MAX_FP64     , double   , 
+GB_MONOID_DEFINE_TERM ( GrB_, MAX_FP64     , double   ,
     ((double) -INFINITY)  , ((double) INFINITY)  )
 
 // PLUS monoids:
@@ -387,6 +447,19 @@ GB_MONOID_DEFINE_TERM ( GrB_, TIMES_UINT64 , uint64_t , 1          , 0)
 GB_MONOID_DEFINE      ( GrB_, TIMES_FP32   , float    , 1          )
 GB_MONOID_DEFINE      ( GrB_, TIMES_FP64   , double   , 1          )
 
+// ANY monoids:
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_INT8     , int8_t   , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_INT16    , int16_t  , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_INT32    , int32_t  , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_INT64    , int64_t  , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_UINT8    , uint8_t  , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_UINT16   , uint16_t , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_UINT32   , uint32_t , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_UINT64   , uint64_t , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_FP32     , float    , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_FP64     , double   , 0, 0)
+GB_MONOID_DEFINE_TERM ( GxB_, ANY_BOOL     , bool     , 0, 0)
+
 // Boolean monoids:
 GB_MONOID_DEFINE_TERM ( GxB_, LOR_BOOL     , bool     , false      , true )
 GB_MONOID_DEFINE_TERM ( GxB_, LAND_BOOL    , bool     , true       , false)
diff --git a/Source/GB_ph_free.c b/Source/GB_ph_free.c
index d029a315c2..1c36896e96 100644
--- a/Source/GB_ph_free.c
+++ b/Source/GB_ph_free.c
@@ -2,7 +2,7 @@
 // GB_ph_free: free the A->p and A->h content of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_phix_free.c b/Source/GB_phix_free.c
index 3a62bb28f1..fc6640beda 100644
--- a/Source/GB_phix_free.c
+++ b/Source/GB_phix_free.c
@@ -2,7 +2,7 @@
 // GB_phix_free: free all content of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_printf.c b/Source/GB_printf.c
index 17ddfb4490..9ca43f3cc2 100644
--- a/Source/GB_printf.c
+++ b/Source/GB_printf.c
@@ -2,7 +2,7 @@
 // GB_printf.c: printing for GraphBLAS *check functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,8 +11,7 @@
 // GB_printf_function has been set by the caller, then that function is used
 // when f is NULL.
 
-#include "GB.h"
+#include "GB_printf.h"
 
-GB_PUBLIC
 int (* GB_printf_function ) (const char *format, ...) = NULL ;
 
diff --git a/Source/GB_printf.h b/Source/GB_printf.h
index 9cd6617d3b..1bbe7ac246 100644
--- a/Source/GB_printf.h
+++ b/Source/GB_printf.h
@@ -2,7 +2,7 @@
 // GB_printf.h: definitions for printing by GraphBLAS check functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_pslice.c b/Source/GB_pslice.c
index 477dabda7f..319d25dae0 100644
--- a/Source/GB_pslice.c
+++ b/Source/GB_pslice.c
@@ -2,7 +2,7 @@
 // GB_pslice: partition Ap for a parallel loop
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -24,16 +24,24 @@ bool GB_pslice          // slice Ap; return true if ok, false if out of memory
 )
 {
 
-    // allocate result
-    int64_t *Slice = NULL ;
-    (*Slice_handle) = NULL ;
-    GB_MALLOC_MEMORY (Slice, ntasks+1, sizeof (int64_t)) ;
-    if (Slice == NULL)
+    // allocate result, unless it is already allocated on input
+    int64_t *Slice ;
+    if ((*Slice_handle) == NULL)
     {
-        // out of memory
-        return (false) ;
+        Slice = NULL ;
+        (*Slice_handle) = NULL ;
+        GB_MALLOC_MEMORY (Slice, ntasks+1, sizeof (int64_t)) ;
+        if (Slice == NULL)
+        { 
+            // out of memory
+            return (false) ;
+        }
+        (*Slice_handle) = Slice ;
+    }
+    else
+    { 
+        Slice = (*Slice_handle) ;
     }
-    (*Slice_handle) = Slice ;
 
     const double work = (Ap == NULL) ? 0 : Ap [n] ;
 
@@ -57,7 +65,7 @@ bool GB_pslice          // slice Ap; return true if ok, false if out of memory
             // just pick what the binary search comes up with.
             int64_t wtask = ((taskid * work) / (double) ntasks) ;
             int64_t pright = n ;
-            GB_BINARY_TRIM_SEARCH (wtask, Ap, k, pright) ;
+            GB_TRIM_BINARY_SEARCH (wtask, Ap, k, pright) ;
             Slice [taskid] = k ;
         }
     }
diff --git a/Source/GB_qsort_1a.c b/Source/GB_qsort_1a.c
index 1f8e06f7ee..50bbf823c1 100644
--- a/Source/GB_qsort_1a.c
+++ b/Source/GB_qsort_1a.c
@@ -2,7 +2,7 @@
 // GB_qsort_1a: sort an 1-by-n list of integers
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_qsort_1b.c b/Source/GB_qsort_1b.c
index 82150e603f..66ba28ccd3 100644
--- a/Source/GB_qsort_1b.c
+++ b/Source/GB_qsort_1b.c
@@ -2,7 +2,7 @@
 // GB_qsort_1b: sort a 2-by-n list, using A [0][ ] as the sort key
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_qsort_2.c b/Source/GB_qsort_2.c
index d618d3ee29..52f788b719 100644
--- a/Source/GB_qsort_2.c
+++ b/Source/GB_qsort_2.c
@@ -2,7 +2,7 @@
 // GB_qsort_2: sort a 2-by-n list of integers, using A[0:1][ ] as the key
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_qsort_3.c b/Source/GB_qsort_3.c
index b5babd4cf7..6041a3c514 100644
--- a/Source/GB_qsort_3.c
+++ b/Source/GB_qsort_3.c
@@ -2,7 +2,7 @@
 // GB_qsort_3: sort a 3-by-n list of integers, using A[0:2][] as the key
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_queue_insert.c b/Source/GB_queue_insert.c
index 9ba1cf9d11..738812917f 100644
--- a/Source/GB_queue_insert.c
+++ b/Source/GB_queue_insert.c
@@ -2,7 +2,7 @@
 // GB_queue_insert:  insert a matrix at the head of the matrix queue
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_queue_remove.c b/Source/GB_queue_remove.c
index 55cf81d6cc..b563cca7b2 100644
--- a/Source/GB_queue_remove.c
+++ b/Source/GB_queue_remove.c
@@ -2,7 +2,7 @@
 // GB_queue_remove: remove a matrix from the matrix queue
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_queue_remove_head.c b/Source/GB_queue_remove_head.c
index 43b94c8c98..81799da850 100644
--- a/Source/GB_queue_remove_head.c
+++ b/Source/GB_queue_remove_head.c
@@ -2,7 +2,7 @@
 // GB_queue_remove_head: remove the matrix at the head of the matrix queue
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_queue_status.c b/Source/GB_queue_status.c
index 95c6097b1a..0c92b95fe3 100644
--- a/Source/GB_queue_status.c
+++ b/Source/GB_queue_status.c
@@ -2,7 +2,7 @@
 // GB_queue_status:  check the status of the queue for a particular matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_realloc_memory.c b/Source/GB_realloc_memory.c
index 8c478bf712..02aed52088 100644
--- a/Source/GB_realloc_memory.c
+++ b/Source/GB_realloc_memory.c
@@ -2,7 +2,7 @@
 // GB_realloc_memory: wrapper for realloc_function
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_reduce.h b/Source/GB_reduce.h
index d410b6fdc4..702850f32f 100644
--- a/Source/GB_reduce.h
+++ b/Source/GB_reduce.h
@@ -2,7 +2,7 @@
 // GB_reduce.h: definitions for GB_reduce
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_reduce_to_scalar.c b/Source/GB_reduce_to_scalar.c
index 13d920f0bc..5ebb7af9b1 100644
--- a/Source/GB_reduce_to_scalar.c
+++ b/Source/GB_reduce_to_scalar.c
@@ -2,7 +2,7 @@
 // GB_reduce_to_scalar: reduce a matrix to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,6 +14,7 @@
 // result is the same if A is in CSR or CSC format.
 
 #include "GB_reduce.h"
+#include "GB_atomics.h"
 #ifndef GBCOMPACT
 #include "GB_red__include.h"
 #endif
@@ -92,7 +93,7 @@ GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
     GB_void *GB_RESTRICT W = NULL ;
     GB_MALLOC_MEMORY (W, ntasks, zsize) ;
     if (W == NULL)
-    {
+    { 
         // out of memory
         return (GB_OUT_OF_MEMORY) ;
     }
@@ -168,6 +169,7 @@ GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
 
         if (!done)
         { 
+            GB_BURBLE_MATRIX (A, "generic ") ;
 
             // the switch factory didn't handle this case
             GxB_binary_function freduce = reduce->op->function ;
@@ -229,7 +231,7 @@ GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
                 // skip the work for this task if early exit is reached
                 #define GB_IF_NOT_EARLY_EXIT                            \
                     bool my_exit ;                                      \
-                    GB_PRAGMA (omp atomic read)                         \
+                    GB_ATOMIC_READ                                      \
                     my_exit = early_exit ;                              \
                     if (!my_exit)
 
@@ -240,7 +242,7 @@ GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
                         if (memcmp (s, terminal, zsize) == 0)           \
                         {                                               \
                             /* tell the other tasks to exit early */    \
-                            GB_PRAGMA (omp atomic write)                \
+                            GB_ATOMIC_WRITE                             \
                             early_exit = true ;                         \
                             break ;                                     \
                         }                                               \
@@ -271,6 +273,8 @@ GrB_Info GB_reduce_to_scalar    // s = reduce_to_scalar (A)
         // generic worker: sum up the entries, with typecasting
         //----------------------------------------------------------------------
 
+        GB_BURBLE_MATRIX (A, "generic ") ;
+
         GxB_binary_function freduce = reduce->op->function ;
         GB_cast_function
             cast_A_to_Z = GB_cast_factory (ztype->code, A->type->code) ;
diff --git a/Source/GB_reduce_to_vector.c b/Source/GB_reduce_to_vector.c
index 649eddbab9..95a2a8e505 100644
--- a/Source/GB_reduce_to_vector.c
+++ b/Source/GB_reduce_to_vector.c
@@ -2,7 +2,7 @@
 // GB_reduce_to_vector: reduce a matrix to a vector using a binary op
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -73,7 +73,8 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
     GB_void *GB_RESTRICT Wlast_space = NULL ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     // C and M are n-by-1 GrB_Vector objects, typecasted to GrB_Matrix
     ASSERT (GB_VECTOR_OK (C)) ;
@@ -173,7 +174,7 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
     // as a GrB_Matrix so it can be passed to GB_accum_mask without
     // typecasting.
 
-    ASSERT (n == (A_transpose) ? A->vdim : A->vlen) ;
+    ASSERT (n == ((A_transpose) ? A->vdim : A->vlen)) ;
 
     //--------------------------------------------------------------------------
     // scalar workspace
@@ -288,7 +289,7 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
 
         if (Wfirst_space == NULL || Wlast_space == NULL ||
            !GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
-        {
+        { 
             // out of memory
             GB_FREE_ALL ;
             return (GB_OUT_OF_MEMORY) ;
@@ -330,6 +331,8 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
         if (!done)
         { 
 
+            GB_BURBLE_MATRIX (A, "generic ") ;
+
             #define GB_ATYPE GB_void
             #define GB_CTYPE GB_void
 
@@ -471,7 +474,7 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
 
             GB_MALLOC_MEMORY (pstart_slice, ntasks+1, sizeof (int64_t)) ;
             if (pstart_slice == NULL)
-            {
+            { 
                 // out of memory
                 GB_FREE_ALL ;
                 return (GB_OUT_OF_MEMORY) ;
@@ -527,6 +530,9 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
 
             if (!done)
             { 
+                // if this fails, the template frees all workspace with the
+                // GB_FREE_ALL macro, defined above.
+                GB_BURBLE_MATRIX (A, "generic ") ;
                 #include "GB_reduce_each_index.c"
             }
         }
@@ -539,6 +545,6 @@ GrB_Info GB_reduce_to_vector        // C<M> = accum (C,reduce(A))
 
     GB_FREE_WORK ;
     return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+        Mask_struct, Context)) ;
 }
 
diff --git a/Source/GB_resize.c b/Source/GB_resize.c
index ed3fdf3f85..326aa334ed 100644
--- a/Source/GB_resize.c
+++ b/Source/GB_resize.c
@@ -2,7 +2,7 @@
 // GB_resize: change the size of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -120,7 +120,7 @@ GrB_Info GB_resize              // change the size of a matrix
             int64_t pleft = 0 ;
             int64_t pright = GB_IMIN (A->nvec, vdim_new) - 1 ;
             bool found ;
-            GB_BINARY_SPLIT_SEARCH (vdim_new, Ah, pleft, pright, found) ;
+            GB_SPLIT_BINARY_SEARCH (vdim_new, Ah, pleft, pright, found) ;
             A->nvec = pleft ;
         }
     }
diff --git a/Source/GB_search_for_vector.c b/Source/GB_search_for_vector.c
index 1a1f8b73da..2b14c3a063 100644
--- a/Source/GB_search_for_vector.c
+++ b/Source/GB_search_for_vector.c
@@ -2,7 +2,7 @@
 // GB_search_for_vector: find the vector k that contains p
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -34,7 +34,7 @@ int64_t GB_search_for_vector        // return the vector k that contains p
     int64_t k = kleft ;
     int64_t kright = anvec ;
     bool found ;
-    GB_BINARY_SPLIT_SEARCH (p, Ap, k, kright, found) ;
+    GB_SPLIT_BINARY_SEARCH (p, Ap, k, kright, found) ;
     if (found)
     {
         // Ap [k] == p has been found, but if k is an empty vector, then the
diff --git a/Source/GB_select.c b/Source/GB_select.c
index 94c91e1b48..0a52a231e7 100644
--- a/Source/GB_select.c
+++ b/Source/GB_select.c
@@ -2,7 +2,7 @@
 // GB_select: apply a select operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -23,6 +23,7 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
     const bool C_replace,           // C descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // descriptor for M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GxB_SelectOp op,          // operator to select the entries
     const GrB_Matrix A,             // input matrix
@@ -69,7 +70,7 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
         opcode == GB_LT_ZERO_opcode || opcode == GB_LT_THUNK_opcode ||
         opcode == GB_LE_ZERO_opcode || opcode == GB_LE_THUNK_opcode ;
 
-    if (typecode >= GB_UCT_code && op_is_ordered_comparator)
+    if (typecode >= GB_UDT_code && op_is_ordered_comparator)
     { 
         // built-in GT, GE, LT, and LE operators cannot be used with
         // user-defined types
@@ -110,7 +111,7 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
         (opcode >= GB_TRIL_opcode && opcode <= GB_OFFDIAG_opcode) ;
 
     // check if op is user-defined
-    bool op_is_user_defined = (opcode >= GB_USER_SELECT_C_opcode) ;
+    bool op_is_user_defined = (opcode >= GB_USER_SELECT_opcode) ;
 
     int64_t nz_thunk = 0 ;
 
@@ -126,7 +127,7 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
         { 
             // Thunk present, but empty, or wrong dimensions
             return (GB_ERROR (GrB_DIMENSION_MISMATCH, (GB_LOG,
-                "Thunk must be a vector of length 1"))) ;
+                "Thunk must be a GrB_Scalar"))) ;
         }
 
         // if op is TRIL, TRIU, DIAG, or OFFDIAG, Thunk_in must be
@@ -486,6 +487,6 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
     //--------------------------------------------------------------------------
 
     return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+        Mask_struct, Context)) ;
 }
 
diff --git a/Source/GB_select.h b/Source/GB_select.h
index cc17ba8a9e..46020cf1f2 100644
--- a/Source/GB_select.h
+++ b/Source/GB_select.h
@@ -2,7 +2,7 @@
 // GB_select.h: definitions for GrB_select and related functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,6 +17,7 @@ GrB_Info GB_select          // C<M> = accum (C, select(A,k)) or select(A',k)
     const bool C_replace,           // C descriptor
     const GrB_Matrix M,             // optional mask for C, unused if NULL
     const bool Mask_comp,           // descriptor for M
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C,T)
     const GxB_SelectOp op,          // operator to select the entries
     const GrB_Matrix A,             // input matrix
diff --git a/Source/GB_selector.c b/Source/GB_selector.c
index f59fa0f42a..06846c7932 100644
--- a/Source/GB_selector.c
+++ b/Source/GB_selector.c
@@ -2,7 +2,7 @@
 // GB_selector:  select entries from a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -56,7 +56,7 @@ GrB_Info GB_selector
     ASSERT_MATRIX_OK (A, "A input for GB_selector", GB_FLIP (GB0)) ;
     ASSERT_SELECTOP_OK_OR_NULL (op, "selectop for GB_selector", GB0) ;
     ASSERT_SCALAR_OK_OR_NULL (Thunk, "Thunk for GB_selector", GB0) ;
-    ASSERT (opcode >= 0 && opcode <= GB_USER_SELECT_R_opcode) ;
+    ASSERT (opcode >= 0 && opcode <= GB_USER_SELECT_opcode) ;
 
     GrB_Info info ;
     if (Chandle != NULL)
@@ -64,6 +64,11 @@ GrB_Info GB_selector
         (*Chandle) = NULL ;
     }
 
+    int64_t *GB_RESTRICT Zp = NULL ;
+    int64_t *GB_RESTRICT Wfirst = NULL ;
+    int64_t *GB_RESTRICT Wlast = NULL ;
+    int64_t *GB_RESTRICT C_pstart_slice = NULL ;
+
     //--------------------------------------------------------------------------
     // determine the number of threads and tasks to use
     //--------------------------------------------------------------------------
@@ -115,7 +120,7 @@ GrB_Info GB_selector
         xthunk = Thunk->x ;
         GB_Type_code tcode = Thunk->type->code ;
         ithunk = 0 ;
-        if (tcode <= GB_FP64_code && opcode < GB_USER_SELECT_C_opcode)
+        if (tcode <= GB_FP64_code && opcode < GB_USER_SELECT_opcode)
         { 
             // ithunk = (int64_t) Thunk (0)
             GB_cast_array ((GB_void *GB_RESTRICT) &ithunk,
@@ -132,17 +137,12 @@ GrB_Info GB_selector
     //--------------------------------------------------------------------------
 
     GxB_select_function user_select = NULL ;
-    if (op != NULL && opcode >= GB_USER_SELECT_C_opcode)
+    if (op != NULL && opcode >= GB_USER_SELECT_opcode)
     { 
+        GB_BURBLE_MATRIX (A, "generic ") ;
         user_select = (GxB_select_function) (op->function) ;
     }
 
-    //--------------------------------------------------------------------------
-    // workspace for tril, triu, diag, offdiage, and resize
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Zp = NULL ;
-
     //--------------------------------------------------------------------------
     // allocate the new vector pointers of C
     //--------------------------------------------------------------------------
@@ -162,35 +162,31 @@ GrB_Info GB_selector
     Cp [anvec] = 0 ;
 
     //--------------------------------------------------------------------------
-    // allocate workspace for each task
+    // slice the entries for each task
     //--------------------------------------------------------------------------
 
-    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
-    int64_t *GB_RESTRICT Wfirst = NULL ;
-    int64_t *GB_RESTRICT Wlast = NULL ;
-    int64_t *GB_RESTRICT C_pstart_slice = NULL ;
-
-    GB_CALLOC_MEMORY (Wfirst, ntasks, sizeof (int64_t)) ;
-    GB_CALLOC_MEMORY (Wlast, ntasks, sizeof (int64_t)) ;
-    GB_CALLOC_MEMORY (C_pstart_slice, ntasks, sizeof (int64_t)) ;
+    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
+    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
+    // vectors may be shared with prior slices and subsequent slices.
 
-    if (Wfirst == NULL || Wlast  == NULL || C_pstart_slice == NULL)
-    {
+    int64_t *pstart_slice = NULL, *kfirst_slice = NULL, *klast_slice = NULL ;
+    if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
     }
 
     //--------------------------------------------------------------------------
-    // slice the entries for each task
+    // allocate workspace for each task
     //--------------------------------------------------------------------------
 
-    // Task tid does entries pstart_slice [tid] to pstart_slice [tid+1]-1 and
-    // vectors kfirst_slice [tid] to klast_slice [tid].  The first and last
-    // vectors may be shared with prior slices and subsequent slices.
+    GB_CALLOC_MEMORY (Wfirst, ntasks, sizeof (int64_t)) ;
+    GB_CALLOC_MEMORY (Wlast, ntasks, sizeof (int64_t)) ;
+    GB_CALLOC_MEMORY (C_pstart_slice, ntasks, sizeof (int64_t)) ;
 
-    if (!GB_ek_slice (&pstart_slice, &kfirst_slice, &klast_slice, A, ntasks))
-    {
+    if (Wfirst == NULL || Wlast  == NULL || C_pstart_slice == NULL)
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
@@ -336,13 +332,11 @@ GrB_Info GB_selector
         if (A->is_hyper && C_nvec_nonempty < anvec)
         {
             // prune empty vectors from Ah and Ap
-            // printf ("prune empties\n") ;
             int64_t cnvec = 0 ;
             for (int64_t k = 0 ; k < anvec ; k++)
             {
                 if (Cp [k] < Cp [k+1])
                 { 
-                    //printf ("keep k "GBd" j "GBd"\n", k, Ah [k]) ;
                     Ah [cnvec] = Ah [k] ;
                     Ap [cnvec] = Cp [k] ;
                     cnvec++ ;
@@ -406,7 +400,6 @@ GrB_Info GB_selector
             {
                 if (Cp [k] < Cp [k+1])
                 { 
-                    // printf ("keep k "GBd" j "GBd"\n", k, Ah [k]) ;
                     Ch [cnvec] = Ah [k] ;
                     Cp [cnvec] = Cp [k] ;
                     cnvec++ ;
diff --git a/Source/GB_setElement.c b/Source/GB_setElement.c
index a7da68e8f7..a1e17d2895 100644
--- a/Source/GB_setElement.c
+++ b/Source/GB_setElement.c
@@ -2,7 +2,7 @@
 // GB_setElement: C(row,col) = scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,7 +30,7 @@
 GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
 (
     GrB_Matrix C,                   // matrix to modify
-    const void *scalar,             // scalar to set
+    void *scalar,                   // scalar to set
     const GrB_Index row,            // row index
     const GrB_Index col,            // column index
     const GB_Type_code scalar_code, // type of the scalar
@@ -75,6 +75,13 @@ GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
     // pending tuples and zombies are expected
     ASSERT (GB_PENDING_OK (C)) ; ASSERT (GB_ZOMBIES_OK (C)) ;
 
+    #if GB_BURBLE
+    bool burble = GB_Global_burble_get ( ) ;
+    double t_burble = 0 ;
+    // do not burble when waiting on scalars or empty matrices
+    burble = burble && ((C->vlen > 1) || (C->vdim > 1)) ;
+    #endif
+
     //--------------------------------------------------------------------------
     // handle the CSR/CSC format
     //--------------------------------------------------------------------------
@@ -114,7 +121,8 @@ GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
 
         // Time taken for this step is at most O(log(nnz(C(:,j))).
         const int64_t *Ci = C->i ;
-        GB_BINARY_ZOMBIE (i, Ci, pleft, pright, found, C->nzombies, is_zombie) ;
+        GB_BINARY_SEARCH_ZOMBIE (i, Ci, pleft, pright, found, C->nzombies,
+            is_zombie) ;
     }
 
     //--------------------------------------------------------------------------
@@ -134,7 +142,7 @@ GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
         // found C (i,j), assign its value
         size_t csize = ctype->size ;
         GB_void *Cx = C->x ;
-        if (scalar_code >= GB_UCT_code || scalar_code == ccode)
+        if (scalar_code >= GB_UDT_code || scalar_code == ccode)
         { 
             // copy the values without typecasting
             memcpy (Cx +(pleft*csize), scalar, csize) ;
@@ -214,8 +222,26 @@ GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
             // new tuple requires both conditions to hold.  All prior tuples
             // must be assembled before this new one can be added.
 
+            #if GB_BURBLE
+            if (burble)
+            {
+                GBBURBLE (" [ *_setElement ") ;
+                #if defined ( _OPENMP )
+                t_burble = GB_OPENMP_GET_WTIME ;
+                #endif
+            }
+            #endif
+
             // delete any lingering zombies and assemble the pending tuples
             GB_WAIT (C) ;
+
+            #if GB_BURBLE
+            if (burble)
+            {
+                GB_BURBLE_END ;
+            }
+            #endif
+
             ASSERT (C->Pending == NULL) ;
 
             // repeat the search since the C(i,j) entry may have been in
@@ -258,7 +284,29 @@ GrB_Info GB_setElement              // set a single entry, C(row,col) = scalar
 
         // this assert is fine, just costly even when debugging
         // ASSERT_MATRIX_OK (C, "did C for setElement (not found)", GB0) ;
-        return (GB_block (C, Context)) ;
+
+        #if GB_BURBLE
+        // only burble if GB_wait will be called
+        burble = (burble && GB_shall_block (C)) ;
+        if (burble)
+        {
+            GBBURBLE (" [ *_setElement ") ;
+            #if defined ( _OPENMP )
+            t_burble = GB_OPENMP_GET_WTIME ;
+            #endif
+        }
+        #endif
+
+        GrB_Info info = GB_block (C, Context) ;
+
+        #if GB_BURBLE
+        if (burble)
+        {
+            GB_BURBLE_END ;
+        }
+        #endif
+
+        return (info) ;
     }
 }
 
diff --git a/Source/GB_shallow_copy.c b/Source/GB_shallow_copy.c
index a72f3595c8..8ef7e0bf3e 100644
--- a/Source/GB_shallow_copy.c
+++ b/Source/GB_shallow_copy.c
@@ -2,7 +2,7 @@
 // GB_shallow_copy: create a shallow copy of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,7 +27,7 @@ GrB_Info GB_shallow_copy    // create a purely shallow matrix
     const GrB_Matrix A,     // input matrix
     GB_Context Context
 )
-{ 
+{
 
     //--------------------------------------------------------------------------
     // check inputs
diff --git a/Source/GB_shallow_op.c b/Source/GB_shallow_op.c
index 2cbdd4e7b3..b039a7e5da 100644
--- a/Source/GB_shallow_op.c
+++ b/Source/GB_shallow_op.c
@@ -2,7 +2,7 @@
 // GB_shallow_op:  create a shallow copy and apply a unary operator to a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_size_t_multiply.c b/Source/GB_size_t_multiply.c
index 10916ec62b..3303e073c5 100644
--- a/Source/GB_size_t_multiply.c
+++ b/Source/GB_size_t_multiply.c
@@ -2,7 +2,7 @@
 // GB_size_t_multiply:  multiply two size_t and guard against overflow
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_slice.c b/Source/GB_slice.c
index 16dc95f07d..b92c368ba5 100644
--- a/Source/GB_slice.c
+++ b/Source/GB_slice.c
@@ -2,7 +2,7 @@
 // GB_slice: create hypersparse shallow slices of a matrix B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -68,16 +68,11 @@ GrB_Info GB_slice       // slice B into nslices slices or hyperslices
 
     for (int s = 0 ; s < nslices ; s++)
     {
-        // printf ("\n================== slice %d\n", s) ;
-
         // Bslice [s] = B (:, bcol_first:bcol_last)
         int64_t bvec_first  = Slice [s] ;
         int64_t bvec_last   = Slice [s+1] - 1 ;
         int64_t bslice_nvec = bvec_last - bvec_first + 1 ;
 
-        // printf ("first "GBd" last "GBd" nvec "GBd"\n", 
-        // bvec_first, bvec_last, bslice_nvec) ;
-
         // allocate just the header for Bslice [s]; all content is shallow
         Bslice [s] = NULL ;
         GB_NEW (&(Bslice [s]), B->type, B->vlen, B->vdim, GB_Ap_null,
diff --git a/Source/GB_slice_vector.c b/Source/GB_slice_vector.c
index 3774f8d92a..cc70ac8ef8 100644
--- a/Source/GB_slice_vector.c
+++ b/Source/GB_slice_vector.c
@@ -2,7 +2,7 @@
 // GB_slice_vector:  slice a vector for GB_add, GB_emult, and GB_mask
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -87,7 +87,6 @@ void GB_slice_vector
     bool a_empty = (aknz == 0) ;
     bool b_empty = (bknz == 0) ;
     bool m_empty = (mknz == 0) ;
-    // printf ("empty a %d b %d m %d\n", a_empty, b_empty, m_empty) ;
 
     int64_t pM = (m_empty) ? -1 : pM_start ;
     int64_t pA = (a_empty) ? -1 : pA_start ;
@@ -104,8 +103,6 @@ void GB_slice_vector
         //----------------------------------------------------------------------
 
         i = (ileft + iright) / 2 ;
-        // printf ("   slice vector i "GBd" in ["GBd" to "GBd"]\n", i, ileft,
-        //     iright) ;
 
         //----------------------------------------------------------------------
         // find where i appears in A(:,kA)
@@ -151,7 +148,7 @@ void GB_slice_vector
             pA = pA_start ;
             bool afound ;
             int64_t apright = pA_end - 1 ;
-            GB_BINARY_SPLIT_SEARCH (i, Ai, pA, apright, afound) ;
+            GB_SPLIT_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
             ASSERT (GB_IMPLIES (afound, GB_Ai (pA) == i)) ;
             ASSERT (pA_start <= pA && pA <= pA_end) ;
         }
@@ -189,14 +186,12 @@ void GB_slice_vector
         else
         { 
             // B(:,kB) is sparse, and not empty
-            // printf ("i is "GBd" bknz "GBd"\n", i, bknz) ;
             ASSERT (bknz > 0) ;
             pB = pB_start ;
             bool bfound ;
             int64_t bpright = pB_end - 1 ;
-            GB_BINARY_SPLIT_SEARCH (i, Bi, pB, bpright, bfound) ;
+            GB_SPLIT_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
             ASSERT (pB_start <= pB && pB <= pB_end) ;
-            // printf ("pB "GBd" bfound %d\n", pB, bfound) ;
         }
         ASSERT (GB_IMPLIES (pB >  pB_start && pB < pB_end, (Bi [pB-1] < i))) ;
         ASSERT (GB_IMPLIES (pB >= pB_start && pB < pB_end, (Bi [pB] >= i ))) ;
@@ -219,7 +214,6 @@ void GB_slice_vector
 
         double work = (a_empty ? 0 : (pA_end - pA))
                     + (b_empty ? 0 : (pB_end - pB)) ;
-        // printf ("    work %g target %g\n", work, target_work) ;
 
         if (work < 0.9999 * target_work)
         { 
@@ -266,8 +260,6 @@ void GB_slice_vector
     // find where i appears in M(:,kM)
     //--------------------------------------------------------------------------
 
-    // printf ("sliced at i "GBd" pA "GBd" pB "GBd"\n", i, pA, pB) ;
-
     if (m_empty)
     { 
         pM = -1 ;
@@ -285,9 +277,8 @@ void GB_slice_vector
         pM = pM_start ;
         bool mfound ;
         int64_t mpright = pM_end - 1 ;
-        GB_BINARY_SPLIT_SEARCH (i, Mi, pM, mpright, mfound) ;
+        GB_SPLIT_BINARY_SEARCH (i, Mi, pM, mpright, mfound) ;
     }
-    // printf ("pM "GBd"\n", pM) ;
 
     //--------------------------------------------------------------------------
     // return result
@@ -303,9 +294,6 @@ void GB_slice_vector
     ASSERT (GB_IMPLIES ((pB >  pB_start && pB < pB_end), Bi [pB-1] <  i)) ;
     ASSERT (GB_IMPLIES ((pB >= pB_start && pB < pB_end), Bi [pB  ] >= i)) ;
 
-    // printf ("sliced vector: i "GBd" pA "GBd" pB "GBd" pM "GBd"\n",
-    //     i, pA, pM, pB) ;
-
     if (p_i != NULL)
     { 
         (*p_i)  = i ;
diff --git a/Source/GB_sort.h b/Source/GB_sort.h
index 8446eb7520..4d6d69eaa2 100644
--- a/Source/GB_sort.h
+++ b/Source/GB_sort.h
@@ -2,7 +2,7 @@
 // GB_sort.h: definitions for sorting functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -47,6 +47,14 @@ void GB_qsort_3     // sort array A of size 3-by-n, using 3 keys (A [0:2][])
     const int64_t n
 ) ;
 
+void GB_msort_1     // sort array A of size n
+(
+    int64_t *GB_RESTRICT A_0,      // size n array
+    int64_t *GB_RESTRICT W_0,      // size n array, workspace
+    const int64_t n,
+    const int nthreads          // # of threads to use
+) ;
+
 void GB_msort_2     // sort array A of size 2-by-n, using 2 keys (A [0:1][])
 (
     int64_t *GB_RESTRICT A_0,      // size n array
diff --git a/Source/GB_status_code.c b/Source/GB_status_code.c
index 806b9858ba..f8bc975580 100644
--- a/Source/GB_status_code.c
+++ b/Source/GB_status_code.c
@@ -2,7 +2,7 @@
 // GB_status_code: return an error string describing the last error
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign.c b/Source/GB_subassign.c
index e3bffa33bb..5beab89514 100644
--- a/Source/GB_subassign.c
+++ b/Source/GB_subassign.c
@@ -2,7 +2,7 @@
 // GB_subassign: C(Rows,Cols)<M> = accum (C(Rows,Cols),A) or A'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,9 +33,10 @@
 GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
 (
     GrB_Matrix C,                   // input/output matrix for results
-    const bool C_replace,           // descriptor for C
+    bool C_replace,                 // descriptor for C
     const GrB_Matrix M_in,          // optional mask for C(Rows,Cols)
     const bool Mask_comp,           // true if mask is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     bool M_transpose,               // true if the mask should be transposed
     const GrB_BinaryOp accum,       // optional accum for accum(C,T)
     const GrB_Matrix A_in,          // input matrix
@@ -90,6 +91,8 @@ GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
     GB_ijlength (Rows, nRows_in, GB_NROWS (C), &nRows, &RowsKind, RowColon) ;
     GB_ijlength (Cols, nCols_in, GB_NCOLS (C), &nCols, &ColsKind, ColColon) ;
 
+    bool whole_C_matrix = (RowsKind == GB_ALL && ColsKind == GB_ALL) ;
+
     GrB_Matrix AT = NULL ;
     GrB_Matrix MT = NULL ;
     GrB_Matrix Z2 = NULL ;
@@ -224,6 +227,7 @@ GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
     { 
         // AT = A', with no typecasting
         // transpose: no typecast, no op, not in place
+        GBBURBLE ("(A transpose) ") ;
         GB_OK (GB_transpose (&AT, NULL, C_is_csc, A, NULL, Context)) ;
         A = AT ;
     }
@@ -251,6 +255,7 @@ GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
             // MT = M' to conform M to the same CSR/CSC format as C.
             // typecast to boolean, if a full matrix transpose is done.
             // transpose: no typecast, no op, not in place
+            GBBURBLE ("(M transpose) ") ;
             GB_OK (GB_transpose (&MT, GrB_BOOL, C_is_csc, M, NULL, Context)) ;
             M = MT ;
         }
@@ -270,16 +275,46 @@ GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
 
     if (C_aliased)
     { 
-        // Z2 = duplicate of C, which must be freed when done
+        // If C is aliased, it no longer has any pending work, A and M have
+        // been finished, above.  This also ensures GB_dup does not need to
+        // finish any pending work in C.
+        GBBURBLE ("(C aliased) ") ;
         ASSERT (!GB_ZOMBIES (C)) ;
         ASSERT (!GB_PENDING (C)) ;
-        GB_OK (GB_dup (&Z2, C, true, NULL, Context)) ;
+        if (whole_C_matrix && C_replace && accum == NULL)
+        { 
+            // C(:,:)<any mask, replace> = A or x, with C aliased to M or A.  C
+            // is about to be cleared in GB_subassigner anyway, but a duplicate
+            // is need.  Instead of duplicating it, create an empty matrix Z2.
+            // This also prevents the C_replace_phase from being needed.
+            GB_NEW (&Z2, C->type, C->vlen, C->vdim, GB_Ap_calloc,
+                C->is_csc, GB_SAME_HYPER_AS (C->is_hyper), C->hyper_ratio,
+                1, Context) ;
+            GB_OK (info)  ;
+            GBBURBLE ("(C alias cleared; C_replace early) ") ;
+            C_replace = false ;
+        }
+        else
+        { 
+            // Z2 = duplicate of C, which must be freed when done
+            GB_OK (GB_dup (&Z2, C, true, NULL, Context)) ;
+        }
         Z = Z2 ;
     }
     else
     { 
-        // GB_subassigner can safely operate on C in place and so can the
-        // C_replace_phase below.
+        // GB_subassigner can safely operate on C in place.
+        // FUTURE:  if C is dense and will remain so,
+        // it would be faster to delay the clearing of C.
+        if (whole_C_matrix && C_replace && accum == NULL)
+        { 
+            // C(:,:)<any mask, replace> = A or x, with C not aliased to M or
+            // A.  C is about to be cleared in GB_subassigner anyway, so clear
+            // it now.
+            GB_OK (GB_clear (C, Context)) ;
+            GBBURBLE ("(C(:,:)<any mask>: C_replace early) ") ;
+            C_replace = false ;
+        }
         Z = C ;
     }
 
@@ -289,7 +324,7 @@ GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
 
     GB_OK (GB_subassigner (
         Z,          C_replace,      // Z matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         accum,                      // for accum (C(I,J),A)
         A,                          // A matrix, NULL for scalar expansion
         I, ni,                      // indices
diff --git a/Source/GB_subassign.h b/Source/GB_subassign.h
index 864db3be2d..63a5337497 100644
--- a/Source/GB_subassign.h
+++ b/Source/GB_subassign.h
@@ -2,7 +2,7 @@
 // GB_subassign.h: definitions for GB_subassign
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,9 +15,10 @@
 GrB_Info GB_subassign               // C(Rows,Cols)<M> += A or A'
 (
     GrB_Matrix C,                   // input/output matrix for results
-    const bool C_replace,           // descriptor for C
+    bool C_replace,                 // descriptor for C
     const GrB_Matrix M_in,          // optional mask for C(Rows,Cols)
     const bool Mask_comp,           // true if mask is complemented
+    const bool Mask_struct,         // if true, use the only structure of M
     bool M_transpose,               // true if the mask should be transposed
     const GrB_BinaryOp accum,       // optional accum for accum(C,T)
     const GrB_Matrix A_in,          // input matrix
@@ -53,6 +54,7 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     bool C_replace,                 // C matrix descriptor
     const GrB_Matrix M_input,       // optional mask for C(I,J), unused if NULL
     const bool Mask_comp,           // mask descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C(I,J),A)
     const GrB_Matrix A_input,       // input matrix (NULL for scalar expansion)
     const GrB_Index *I_input,       // list of indices
diff --git a/Source/GB_subassign_00.c b/Source/GB_subassign_00.c
index 75f2258640..add14b8bbd 100644
--- a/Source/GB_subassign_00.c
+++ b/Source/GB_subassign_00.c
@@ -2,7 +2,7 @@
 // GB_subassign_00: C(I,J)<!,repl> = empty ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign_01.c b/Source/GB_subassign_01.c
index 286e8e051d..d5c32b03cc 100644
--- a/Source/GB_subassign_01.c
+++ b/Source/GB_subassign_01.c
@@ -2,7 +2,7 @@
 // GB_subassign_01: C(I,J) = scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign_02.c b/Source/GB_subassign_02.c
index 1c4f936150..e48e5708ad 100644
--- a/Source/GB_subassign_02.c
+++ b/Source/GB_subassign_02.c
@@ -2,7 +2,7 @@
 // GB_subassign_02: C(I,J) = A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign_03.c b/Source/GB_subassign_03.c
index e0bfa456d4..89399af66b 100644
--- a/Source/GB_subassign_03.c
+++ b/Source/GB_subassign_03.c
@@ -2,7 +2,7 @@
 // GB_subassign_03: C(I,J) += scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign_04.c b/Source/GB_subassign_04.c
index b9f843b3dc..17b1a64ca6 100644
--- a/Source/GB_subassign_04.c
+++ b/Source/GB_subassign_04.c
@@ -2,7 +2,7 @@
 // GB_subassign_04: C(I,J) += A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subassign_05.c b/Source/GB_subassign_05.c
index feaa9a7930..8134a4ac05 100644
--- a/Source/GB_subassign_05.c
+++ b/Source/GB_subassign_05.c
@@ -2,7 +2,7 @@
 // GB_subassign_05: C(I,J)<M> = scalar ; no S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_05
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     GB_Context Context
@@ -128,18 +129,11 @@ GrB_Info GB_subassign_05
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t iA = Mi [pM] ;
                         GB_iC_DENSE_LOOKUP ;
@@ -162,20 +156,15 @@ GrB_Info GB_subassign_05
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
                         if (cij_found)
                         { 
@@ -253,20 +242,15 @@ GrB_Info GB_subassign_05
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
                         if (!cij_found)
                         { 
diff --git a/Source/GB_subassign_05e.c b/Source/GB_subassign_05e.c
new file mode 100644
index 0000000000..6f857752e0
--- /dev/null
+++ b/Source/GB_subassign_05e.c
@@ -0,0 +1,137 @@
+//------------------------------------------------------------------------------
+// GB_subassign_05e: C(:,:)<M,struct> = scalar ; no S, C empty, M structural
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Method 05e: C(:,:)<M,struct> = scalar ; no S
+// compare with Methods 21, 25, and 05d
+
+// M:           present
+// Mask_comp:   false
+// Mask_struct: true
+// C_replace:   false
+// accum:       NULL
+// A:           scalar
+// S:           none
+
+#include "GB_subassign_methods.h"
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL
+
+GrB_Info GB_subassign_05e
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
+    const void *scalar,
+    const GrB_Type atype,
+    GB_Context Context
+)
+{
+
+    //--------------------------------------------------------------------------
+    // get inputs
+    //--------------------------------------------------------------------------
+
+    GrB_Info info ;
+    ASSERT_MATRIX_OK (C, "C for subassign method_05e", GB0) ;
+    ASSERT_MATRIX_OK (M, "M for subassign method_05e", GB0) ;
+    ASSERT (GB_NNZ (C) == 0) ;
+    ASSERT (!GB_PENDING (C)) ; ASSERT (!GB_ZOMBIES (C)) ;
+    ASSERT (!GB_PENDING (M)) ; ASSERT (!GB_ZOMBIES (M)) ;
+    const GB_Type_code ccode = C->type->code ;
+    const size_t csize = C->type->size ;
+    GB_GET_SCALAR ;
+
+    int64_t mnz = GB_NNZ (M) ;
+
+    //--------------------------------------------------------------------------
+    // Method 05e: C(:,:)<M> = x ; C is empty, x is a scalar, M is structural
+    //--------------------------------------------------------------------------
+
+    // Time: Optimal:  the method must iterate over all entries in M,
+    // and the time is O(nnz(M)).  This is also the size of C.
+
+    //--------------------------------------------------------------------------
+    // determine the number of threads to use
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+    int nthreads = GB_nthreads (mnz, chunk, nthreads_max) ;
+
+    //--------------------------------------------------------------------------
+    // allocate C and create its pattern
+    //--------------------------------------------------------------------------
+
+    // clear prior content and then create a copy of the pattern of M.  Keep
+    // the same type and CSR/CSC for C.  Allocate the values of C but do not
+    // initialize them.
+
+    bool C_is_csc = C->is_csc ;
+    GB_PHIX_FREE (C) ;
+    GB_OK (GB_dup2 (&C, M, false, C->type, Context)) ;
+    C->is_csc = C_is_csc ;
+    int64_t pC ;
+
+    //--------------------------------------------------------------------------
+    // define the worker for the switch factory
+    //--------------------------------------------------------------------------
+
+    // worker for built-in types
+    #define GB_WORKER(ctype)                                                \
+    {                                                                       \
+        ctype *GB_RESTRICT Cx = C->x ;                                      \
+        ctype x = (*(ctype *) cwork) ;                                      \
+        GB_PRAGMA (omp parallel for num_threads(nthreads) schedule(static)) \
+        for (pC = 0 ; pC < mnz ; pC++)                                      \
+        {                                                                   \
+            Cx [pC] = x ;                                                   \
+        }                                                                   \
+    }                                                                       \
+    break ;
+
+    //--------------------------------------------------------------------------
+    // launch the switch factory
+    //--------------------------------------------------------------------------
+
+    switch (C->type->code)
+    {
+        case GB_BOOL_code   : GB_WORKER (bool) ;
+        case GB_INT8_code   : GB_WORKER (int8_t) ;
+        case GB_INT16_code  : GB_WORKER (int16_t) ;
+        case GB_INT32_code  : GB_WORKER (int32_t) ;
+        case GB_INT64_code  : GB_WORKER (int64_t) ;
+        case GB_UINT8_code  : GB_WORKER (uint8_t) ;
+        case GB_UINT16_code : GB_WORKER (uint16_t) ;
+        case GB_UINT32_code : GB_WORKER (uint32_t) ;
+        case GB_UINT64_code : GB_WORKER (uint64_t) ;
+        case GB_FP32_code   : GB_WORKER (float) ;
+        case GB_FP64_code   : GB_WORKER (double) ;
+        default:
+            {
+                // worker for all user-defined types
+                GB_BURBLE_N (mnz, "generic ") ;
+                GB_void *GB_RESTRICT Cx = C->x ;
+                #pragma omp parallel for num_threads(nthreads) schedule(static)
+                for (pC = 0 ; pC < mnz ; pC++)
+                { 
+                    memcpy (Cx +((pC)*csize), cwork, csize) ;
+                }
+            }
+            break ;
+    }
+
+    //--------------------------------------------------------------------------
+    // free workspace and return result
+    //--------------------------------------------------------------------------
+
+    GB_FREE_WORK ;
+    ASSERT_MATRIX_OK (C, "C output for subassign method_05e", GB0) ;
+    return (GrB_SUCCESS) ;
+}
+
diff --git a/Source/GB_subassign_06n.c b/Source/GB_subassign_06n.c
index be938e799d..8d35bc209d 100644
--- a/Source/GB_subassign_06n.c
+++ b/Source/GB_subassign_06n.c
@@ -2,7 +2,7 @@
 // GB_subassign_06n: C(I,J)<M> = A ; no S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_06n
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     GB_Context Context
 )
@@ -52,6 +53,7 @@ GrB_Info GB_subassign_06n
     const int64_t *GB_RESTRICT Ah = A->h ;
     const int64_t Anvec = A->nvec ;
     const bool A_is_hyper = A->is_hyper ;
+    const int64_t avlen = A->vlen ;
     GrB_BinaryOp accum = NULL ;
 
     //--------------------------------------------------------------------------
@@ -113,6 +115,8 @@ GrB_Info GB_subassign_06n
             int64_t pA, pA_end ;
             GB_VECTOR_LOOKUP (pA, pA_end, A, j) ;
             int64_t ajnz = pA_end - pA ;
+            bool ajdense = (ajnz == avlen) ;
+            int64_t pA_start = pA ;
 
             //------------------------------------------------------------------
             // get jC, the corresponding vector of C
@@ -127,35 +131,59 @@ GrB_Info GB_subassign_06n
             // C(I,jC)<M(:,j)> = A(:,j) ; no S
             //------------------------------------------------------------------
 
-            if (cjdense)
+            if (cjdense && ajdense)
             {
 
                 //--------------------------------------------------------------
-                // C(:,jC) is dense so the binary search of C is not needed
+                // C(:,jC) and A(:,j) are both dense
                 //--------------------------------------------------------------
 
                 for ( ; pM < pM_end ; pM++)
                 {
 
                     //----------------------------------------------------------
-                    // consider the entry M(iA,j)
+                    // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    if (GB_mcast (Mx, pM, msize))
+                    { 
+                        int64_t iA = Mi [pM] ;
+                        GB_iC_DENSE_LOOKUP ;
+
+                        // find iA in A(:,j)
+                        // A(:,j) is dense; no need for binary search
+                        pA = pA_start + iA ;
+                        ASSERT (Ai [pA] == iA) ;
+                        // ----[C A 1] or [X A 1]-----------------------
+                        // [C A 1]: action: ( =A ): copy A to C, no acc
+                        // [X A 1]: action: ( undelete ): zombie lives
+                        GB_noaccum_C_A_1_matrix ;
+                    }
+                }
+
+            }
+            else if (cjdense)
+            {
+
+                //--------------------------------------------------------------
+                // C(:,jC) is dense, A(:,j) is sparse
+                //--------------------------------------------------------------
+
+                for ( ; pM < pM_end ; pM++)
+                {
 
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t iA = Mi [pM] ;
                         GB_iC_DENSE_LOOKUP ;
 
                         // find iA in A(:,j)
-                        int64_t apright = pA_end - 1 ;
                         bool aij_found ;
+                        int64_t apright = pA_end - 1 ;
                         GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
 
                         if (!aij_found)
@@ -169,7 +197,7 @@ GrB_Info GB_subassign_06n
                         else
                         { 
                             // ----[C A 1] or [X A 1]---------------------------
-                            // [C A 1]: action: ( =A ): copy A into C, no accum
+                            // [C A 1]: action: ( =A ): copy A to C, no accum
                             // [X A 1]: action: ( undelete ): zombie lives
                             GB_noaccum_C_A_1_matrix ;
                         }
@@ -177,35 +205,73 @@ GrB_Info GB_subassign_06n
                 }
 
             }
-            else
+            else if (ajdense)
             {
 
                 //--------------------------------------------------------------
-                // C(:,jC) is sparse; use binary search for C
+                // C(:,jC) is sparse, A(:,j) is dense
                 //--------------------------------------------------------------
 
                 for ( ; pM < pM_end ; pM++)
                 {
 
                     //----------------------------------------------------------
-                    // consider the entry M(iA,j)
+                    // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    if (GB_mcast (Mx, pM, msize))
+                    {
+                        int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
+                        GB_iC_BINARY_SEARCH ;
+
+                        // lookup iA in A(:,j)
+                        pA = pA_start + iA ;
+                        ASSERT (Ai [pA] == iA) ;
+
+                        if (cij_found)
+                        { 
+                            // ----[C A 1] or [X A 1]---------------------------
+                            // [C A 1]: action: ( =A ): copy A into C, no accum
+                            // [X A 1]: action: ( undelete ): zombie lives
+                            GB_noaccum_C_A_1_matrix ;
+                        }
+                        else
+                        { 
+                            // C (iC,jC) is not present, A (i,j) is present
+                            // ----[. A 1]--------------------------------------
+                            // [. A 1]: action: ( insert )
+                            task_pending++ ;
+                        }
+                    }
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // C(:,jC) and A(:,j) are both sparse
+                //--------------------------------------------------------------
+
+                for ( ; pM < pM_end ; pM++)
+                {
 
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
 
                         // find iA in A(:,j)
-                        int64_t apright = pA_end - 1 ;
                         bool aij_found ;
+                        int64_t apright = pA_end - 1 ;
                         GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
 
                         if (cij_found && aij_found)
@@ -280,6 +346,8 @@ GrB_Info GB_subassign_06n
             GB_VECTOR_LOOKUP (pA, pA_end, A, j) ;
             int64_t ajnz = pA_end - pA ;
             if (ajnz == 0) continue ;
+            bool ajdense = (ajnz == avlen) ;
+            int64_t pA_start = pA ;
 
             //------------------------------------------------------------------
             // get jC, the corresponding vector of C
@@ -302,27 +370,31 @@ GrB_Info GB_subassign_06n
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
 
                         // find iA in A(:,j)
-                        int64_t apright = pA_end - 1 ;
-                        bool aij_found ;
-                        GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
-                        if (!aij_found) continue ;
+                        if (ajdense)
+                        {
+                            // A(:,j) is dense; no need for binary search
+                            pA = pA_start + iA ;
+                            ASSERT (Ai [pA] == iA) ;
+                        }
+                        else
+                        {
+                            // A(:,j) is sparse; use binary search
+                            int64_t apright = pA_end - 1 ;
+                            bool aij_found ;
+                            GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
+                            if (!aij_found) continue ;
+                        }
 
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
                         if (!cij_found)
                         { 
diff --git a/Source/GB_subassign_06s.c b/Source/GB_subassign_06s.c
index cfd60f1871..233a264708 100644
--- a/Source/GB_subassign_06s.c
+++ b/Source/GB_subassign_06s.c
@@ -2,7 +2,7 @@
 // GB_subassign_06s: C(I,J)<M> = A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_06s
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -47,6 +48,7 @@ GrB_Info GB_subassign_06s
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GrB_BinaryOp accum = NULL ;
@@ -106,6 +108,7 @@ GrB_Info GB_subassign_06s
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -123,7 +126,7 @@ GrB_Info GB_subassign_06s
                 if (iS < iA)
                 {
                     // S (i,j) is present but A (i,j) is not
-                    GB_MIJ_BINARY_SEARCH (iS) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                     if (mij)
                     { 
                         // ----[C . 1] or [X . 1]-------------------------------
@@ -137,7 +140,7 @@ GrB_Info GB_subassign_06s
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -149,7 +152,7 @@ GrB_Info GB_subassign_06s
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[C A 1] or [X A 1]-------------------------------
@@ -168,7 +171,7 @@ GrB_Info GB_subassign_06s
             {
                 // S (i,j) is present but A (i,j) is not
                 int64_t iS = Si [pS] ;
-                GB_MIJ_BINARY_SEARCH (iS) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                 if (mij)
                 { 
                     // ----[C . 1] or [X . 1]-----------------------------------
@@ -185,7 +188,7 @@ GrB_Info GB_subassign_06s
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
@@ -237,6 +240,7 @@ GrB_Info GB_subassign_06s
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -259,7 +263,7 @@ GrB_Info GB_subassign_06s
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -282,7 +286,7 @@ GrB_Info GB_subassign_06s
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
diff --git a/Source/GB_subassign_07.c b/Source/GB_subassign_07.c
index c5acbcf72e..a0afc54676 100644
--- a/Source/GB_subassign_07.c
+++ b/Source/GB_subassign_07.c
@@ -2,7 +2,7 @@
 // GB_subassign_07: C(I,J)<M> += scalar ; no S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_07
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -121,18 +122,11 @@ GrB_Info GB_subassign_07
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t iA = Mi [pM] ;
                         GB_iC_DENSE_LOOKUP ;
@@ -155,20 +149,15 @@ GrB_Info GB_subassign_07
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
                         if (cij_found)
                         { 
@@ -246,20 +235,15 @@ GrB_Info GB_subassign_07
                 for ( ; pM < pM_end ; pM++)
                 {
 
-                    //----------------------------------------------------------
-                    // consider the entry M(iA,j)
-                    //----------------------------------------------------------
-
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-
                     //----------------------------------------------------------
                     // update C(iC,jC), but only if M(iA,j) allows it
                     //----------------------------------------------------------
 
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     {
                         int64_t iA = Mi [pM] ;
+
+                        // find C(iC,jC) in C(:,jC)
                         GB_iC_BINARY_SEARCH ;
                         if (!cij_found)
                         { 
diff --git a/Source/GB_subassign_08.c b/Source/GB_subassign_08.c
index da228af67f..5a5b6c043d 100644
--- a/Source/GB_subassign_08.c
+++ b/Source/GB_subassign_08.c
@@ -2,7 +2,7 @@
 // GB_subassign_08: C(I,J)<M> += A ; no S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -93,6 +93,7 @@ GrB_Info GB_subassign_08
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     GB_Context Context
@@ -111,6 +112,7 @@ GrB_Info GB_subassign_08
     const int64_t *GB_RESTRICT Ch = C->h ;
     const int64_t *GB_RESTRICT Cp = C->p ;
     GB_GET_MASK ;
+    // const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     const int64_t *GB_RESTRICT Ah = A->h ;
     GB_GET_ACCUM ;
@@ -124,8 +126,8 @@ GrB_Info GB_subassign_08
     // M(:,j) or A(:,j) are very sparse compared to the other, then the shorter
     // is traversed with a linear-time scan and a binary search is used for the
     // other.  If the number of nonzeros is comparable, a linear-time scan is
-    // used for both.  Once a pair of entries M(i,j)=1 and A(i,j), is found,
-    // the entry A(i,j) is accumulated or inserted into C.
+    // used for both.  Once two entries M(i,j)=1 and A(i,j) are found with the
+    // same index i, the entry A(i,j) is accumulated or inserted into C.
 
     // The algorithm is very much like the eWise multiplication of A.*M, so the
     // parallel scheduling relies on GB_emult_phase0(AA and GB_ewise_slice.
@@ -201,14 +203,13 @@ GrB_Info GB_subassign_08
 
                 for ( ; pM < pM_end ; pM++)
                 {
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t iA = Mi [pM] ;
                         // find iA in A(:,j)
                         int64_t pright = pA_end - 1 ;
                         bool found ;
+                        // FUTURE::: exploit dense A(:,j)
                         GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
                         if (found) GB_PHASE1_ACTION ;
                     }
@@ -222,10 +223,13 @@ GrB_Info GB_subassign_08
                 // M(:,j) is much denser than A(:,j)
                 //--------------------------------------------------------------
 
+                // FUTURE::: exploit dense mask
+                bool mjdense = false ;
+
                 for ( ; pA < pA_end ; pA++)
                 { 
                     int64_t iA = Ai [pA] ;
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij) GB_PHASE1_ACTION ;
                 }
 
@@ -256,9 +260,7 @@ GrB_Info GB_subassign_08
                     else
                     { 
                         // both A(i,j) and M(i,j) exist
-                        bool mij ;
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
-                        if (mij) GB_PHASE1_ACTION ;
+                        if (GB_mcast (Mx, pM, msize)) GB_PHASE1_ACTION ;
                         GB_NEXT (A) ;
                         GB_NEXT (M) ;
                     }
@@ -337,14 +339,13 @@ GrB_Info GB_subassign_08
 
                 for ( ; pM < pM_end ; pM++)
                 {
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         int64_t iA = Mi [pM] ;
                         // find iA in A(:,j)
                         int64_t pright = pA_end - 1 ;
                         bool found ;
+                        // FUTURE::: exploit dense A(:,j)
                         GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
                         if (found) GB_PHASE2_ACTION ;
                     }
@@ -358,10 +359,13 @@ GrB_Info GB_subassign_08
                 // M(:,j) is much denser than A(:,j)
                 //--------------------------------------------------------------
 
+                // FUTURE::: exploit dense mask
+                bool mjdense = false ;
+
                 for ( ; pA < pA_end ; pA++)
                 { 
                     int64_t iA = Ai [pA] ;
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij) GB_PHASE2_ACTION ;
                 }
 
@@ -392,9 +396,7 @@ GrB_Info GB_subassign_08
                     else
                     { 
                         // both A(i,j) and M(i,j) exist
-                        bool mij ;
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
-                        if (mij) GB_PHASE2_ACTION ;
+                        if (GB_mcast (Mx, pM, msize)) GB_PHASE2_ACTION ;
                         GB_NEXT (A) ;
                         GB_NEXT (M) ;
                     }
diff --git a/Source/GB_subassign_09.c b/Source/GB_subassign_09.c
index 53ffa0b7d7..49c0419e4e 100644
--- a/Source/GB_subassign_09.c
+++ b/Source/GB_subassign_09.c
@@ -2,7 +2,7 @@
 // GB_subassign_09: C(I,J)<M,repl> = scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_09
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -49,9 +50,8 @@ GrB_Info GB_subassign_09
     const int64_t *GB_RESTRICT Mp = M->p ;
 //  const int64_t *GB_RESTRICT Mh = M->h ;
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
     GB_GET_SCALAR ;
     GB_GET_S ;
     GrB_BinaryOp accum = NULL ;
@@ -134,9 +134,7 @@ GrB_Info GB_subassign_09
                 else if (iM < iS)
                 {
                     // S (i,j) is not present, M (i,j) is present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[. A 1]------------------------------------------
                         // [. A 1]: action: ( insert )
@@ -147,10 +145,8 @@ GrB_Info GB_subassign_09
                 else
                 {
                     // both S (i,j) and M (i,j) present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
                     GB_C_S_LOOKUP ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[C A 1] or [X A 1]-------------------------------
                         // [C A 1]: action: ( =A ): copy A, no accum
@@ -185,10 +181,7 @@ GrB_Info GB_subassign_09
             while (pM < pM_end)
             {
                 // S (i,j) is not present, M (i,j) is present
-                // mij = (bool) M [pM]
-                bool mij ;
-                cast_M (&mij, Mx +(pM*msize), 0) ;
-                if (mij)
+                if (GB_mcast (Mx, pM, msize))
                 { 
                     // ----[. A 1]------------------------------------------
                     // [. A 1]: action: ( insert )
@@ -254,9 +247,7 @@ GrB_Info GB_subassign_09
                 else if (iM < iS)
                 {
                     // S (i,j) is not present, M (i,j) is present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[. A 1]------------------------------------------
                         // [. A 1]: action: ( insert )
@@ -277,10 +268,7 @@ GrB_Info GB_subassign_09
             while (pM < pM_end)
             {
                 // S (i,j) is not present, M (i,j) is present
-                // mij = (bool) M [pM]
-                bool mij ;
-                cast_M (&mij, Mx +(pM*msize), 0) ;
-                if (mij)
+                if (GB_mcast (Mx, pM, msize))
                 { 
                     // ----[. A 1]------------------------------------------
                     // [. A 1]: action: ( insert )
diff --git a/Source/GB_subassign_10.c b/Source/GB_subassign_10.c
index cb6c6f314b..947603d5bf 100644
--- a/Source/GB_subassign_10.c
+++ b/Source/GB_subassign_10.c
@@ -2,7 +2,7 @@
 // GB_subassign_10: C(I,J)<M,repl> = A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_10
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -47,6 +48,7 @@ GrB_Info GB_subassign_10
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GrB_BinaryOp accum = NULL ;
@@ -104,6 +106,7 @@ GrB_Info GB_subassign_10
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -134,7 +137,7 @@ GrB_Info GB_subassign_10
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -146,7 +149,7 @@ GrB_Info GB_subassign_10
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     GB_C_S_LOOKUP ;
                     if (mij)
                     { 
@@ -184,7 +187,7 @@ GrB_Info GB_subassign_10
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
@@ -236,6 +239,7 @@ GrB_Info GB_subassign_10
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -258,7 +262,7 @@ GrB_Info GB_subassign_10
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -281,7 +285,7 @@ GrB_Info GB_subassign_10
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
diff --git a/Source/GB_subassign_11.c b/Source/GB_subassign_11.c
index 9e774a66bd..6ef2cd4520 100644
--- a/Source/GB_subassign_11.c
+++ b/Source/GB_subassign_11.c
@@ -2,7 +2,7 @@
 // GB_subassign_11: C(I,J)<M,repl> += scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_11
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -50,9 +51,8 @@ GrB_Info GB_subassign_11
     const int64_t *GB_RESTRICT Mp = M->p ;
 //  const int64_t *GB_RESTRICT Mh = M->h ;
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
     GB_GET_ACCUM_SCALAR ;
     GB_GET_S ;
 
@@ -134,9 +134,7 @@ GrB_Info GB_subassign_11
                 else if (iM < iS)
                 {
                     // S (i,j) is not present, M (i,j) is present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[. A 1]------------------------------------------
                         // [. A 1]: action: ( insert )
@@ -147,10 +145,8 @@ GrB_Info GB_subassign_11
                 else
                 {
                     // both S (i,j) and M (i,j) present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
                     GB_C_S_LOOKUP ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[C A 1] or [X A 1]-------------------------------
                         // [C A 1]: action: ( =C+A ): apply accum
@@ -185,10 +181,7 @@ GrB_Info GB_subassign_11
             while (pM < pM_end)
             {
                 // S (i,j) is not present, M (i,j) is present
-                // mij = (bool) M [pM]
-                bool mij ;
-                cast_M (&mij, Mx +(pM*msize), 0) ;
-                if (mij)
+                if (GB_mcast (Mx, pM, msize))
                 { 
                     // ----[. A 1]------------------------------------------
                     // [. A 1]: action: ( insert )
@@ -254,9 +247,7 @@ GrB_Info GB_subassign_11
                 else if (iM < iS)
                 {
                     // S (i,j) is not present, M (i,j) is present
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pM, msize))
                     { 
                         // ----[. A 1]------------------------------------------
                         // [. A 1]: action: ( insert )
@@ -277,10 +268,7 @@ GrB_Info GB_subassign_11
             while (pM < pM_end)
             {
                 // S (i,j) is not present, M (i,j) is present
-                // mij = (bool) M [pM]
-                bool mij ;
-                cast_M (&mij, Mx +(pM*msize), 0) ;
-                if (mij)
+                if (GB_mcast (Mx, pM, msize))
                 { 
                     // ----[. A 1]------------------------------------------
                     // [. A 1]: action: ( insert )
diff --git a/Source/GB_subassign_12.c b/Source/GB_subassign_12.c
index a2ddc3c825..05ad2e2f69 100644
--- a/Source/GB_subassign_12.c
+++ b/Source/GB_subassign_12.c
@@ -2,7 +2,7 @@
 // GB_subassign_12: C(I,J)<M,repl> += A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_12
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -48,6 +49,7 @@ GrB_Info GB_subassign_12
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GB_GET_ACCUM ;
@@ -105,6 +107,7 @@ GrB_Info GB_subassign_12
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -122,7 +125,7 @@ GrB_Info GB_subassign_12
                 if (iS < iA)
                 {
                     // S (i,j) is present but A (i,j) is not
-                    GB_MIJ_BINARY_SEARCH (iS) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                     if (!mij)
                     { 
                         // ----[C . 0] or [X . 0]-------------------------------
@@ -136,7 +139,7 @@ GrB_Info GB_subassign_12
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -148,7 +151,7 @@ GrB_Info GB_subassign_12
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     GB_C_S_LOOKUP ;
                     if (mij)
                     { 
@@ -174,7 +177,7 @@ GrB_Info GB_subassign_12
             while (pS < pS_end)
             {
                 int64_t iS = Si [pS] ;
-                GB_MIJ_BINARY_SEARCH (iS) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                 if (!mij)
                 { 
                     // ----[C . 0] or [X . 0]-----------------------------------
@@ -191,7 +194,7 @@ GrB_Info GB_subassign_12
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
@@ -243,6 +246,7 @@ GrB_Info GB_subassign_12
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -265,7 +269,7 @@ GrB_Info GB_subassign_12
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     if (mij)
                     { 
                         // ----[. A 1]------------------------------------------
@@ -288,7 +292,7 @@ GrB_Info GB_subassign_12
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 if (mij)
                 { 
                     // ----[. A 1]----------------------------------------------
diff --git a/Source/GB_subassign_13.c b/Source/GB_subassign_13.c
index 2143b467d9..96e5ac5a96 100644
--- a/Source/GB_subassign_13.c
+++ b/Source/GB_subassign_13.c
@@ -2,7 +2,7 @@
 // GB_subassign_13: C(I,J)<!M> = scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_13
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -136,7 +137,7 @@ GrB_Info GB_subassign_13
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
@@ -253,7 +254,7 @@ GrB_Info GB_subassign_13
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
diff --git a/Source/GB_subassign_14.c b/Source/GB_subassign_14.c
index 827c26b749..2d8c20b6fb 100644
--- a/Source/GB_subassign_14.c
+++ b/Source/GB_subassign_14.c
@@ -2,7 +2,7 @@
 // GB_subassign_14: C(I,J)<!M> = A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_14
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -47,6 +48,7 @@ GrB_Info GB_subassign_14
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GrB_BinaryOp accum = NULL ;
@@ -104,6 +106,7 @@ GrB_Info GB_subassign_14
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -121,7 +124,7 @@ GrB_Info GB_subassign_14
                 if (iS < iA)
                 {
                     // S (i,j) is present but A (i,j) is not
-                    GB_MIJ_BINARY_SEARCH (iS) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -136,7 +139,7 @@ GrB_Info GB_subassign_14
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -149,7 +152,7 @@ GrB_Info GB_subassign_14
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -169,7 +172,7 @@ GrB_Info GB_subassign_14
             {
                 // S (i,j) is present but A (i,j) is not
                 int64_t iS = Si [pS] ;
-                GB_MIJ_BINARY_SEARCH (iS) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                 mij = !mij ;
                 if (mij)
                 { 
@@ -187,7 +190,7 @@ GrB_Info GB_subassign_14
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
@@ -240,6 +243,7 @@ GrB_Info GB_subassign_14
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -262,7 +266,7 @@ GrB_Info GB_subassign_14
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -286,7 +290,7 @@ GrB_Info GB_subassign_14
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
diff --git a/Source/GB_subassign_15.c b/Source/GB_subassign_15.c
index ed4453b8bd..a957bd4570 100644
--- a/Source/GB_subassign_15.c
+++ b/Source/GB_subassign_15.c
@@ -2,7 +2,7 @@
 // GB_subassign_15: C(I,J)<!M> += scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_15
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -136,7 +137,7 @@ GrB_Info GB_subassign_15
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
@@ -253,7 +254,7 @@ GrB_Info GB_subassign_15
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
diff --git a/Source/GB_subassign_16.c b/Source/GB_subassign_16.c
index a463fc710b..a9975fc4d1 100644
--- a/Source/GB_subassign_16.c
+++ b/Source/GB_subassign_16.c
@@ -2,7 +2,7 @@
 // GB_subassign_16: C(I,J)<!M> += A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_16
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -48,6 +49,7 @@ GrB_Info GB_subassign_16
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GB_GET_ACCUM ;
@@ -103,6 +105,7 @@ GrB_Info GB_subassign_16
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -131,7 +134,7 @@ GrB_Info GB_subassign_16
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -144,7 +147,7 @@ GrB_Info GB_subassign_16
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -167,7 +170,7 @@ GrB_Info GB_subassign_16
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
@@ -220,6 +223,7 @@ GrB_Info GB_subassign_16
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -242,7 +246,7 @@ GrB_Info GB_subassign_16
                 else if (iA < iS)
                 { 
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -266,7 +270,7 @@ GrB_Info GB_subassign_16
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
diff --git a/Source/GB_subassign_17.c b/Source/GB_subassign_17.c
index 369d8d84dd..dc52af7ad3 100644
--- a/Source/GB_subassign_17.c
+++ b/Source/GB_subassign_17.c
@@ -2,7 +2,7 @@
 // GB_subassign_17: C(I,J)<!M,repl> = scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_17
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -136,7 +137,7 @@ GrB_Info GB_subassign_17
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
@@ -260,7 +261,7 @@ GrB_Info GB_subassign_17
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
diff --git a/Source/GB_subassign_18.c b/Source/GB_subassign_18.c
index 6aea135ff8..32137e6524 100644
--- a/Source/GB_subassign_18.c
+++ b/Source/GB_subassign_18.c
@@ -2,7 +2,7 @@
 // GB_subassign_18: C(I,J)<!M,repl> = A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_18
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -47,6 +48,7 @@ GrB_Info GB_subassign_18
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GrB_BinaryOp accum = NULL ;
@@ -105,6 +107,7 @@ GrB_Info GB_subassign_18
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -135,7 +138,7 @@ GrB_Info GB_subassign_18
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -148,7 +151,7 @@ GrB_Info GB_subassign_18
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     GB_C_S_LOOKUP ;
                     if (mij)
@@ -187,7 +190,7 @@ GrB_Info GB_subassign_18
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
@@ -240,6 +243,7 @@ GrB_Info GB_subassign_18
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -262,7 +266,7 @@ GrB_Info GB_subassign_18
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -286,7 +290,7 @@ GrB_Info GB_subassign_18
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
diff --git a/Source/GB_subassign_19.c b/Source/GB_subassign_19.c
index 63c2d55594..1cfafd2842 100644
--- a/Source/GB_subassign_19.c
+++ b/Source/GB_subassign_19.c
@@ -2,7 +2,7 @@
 // GB_subassign_19: C(I,J)<!M,repl> += scalar ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,6 +31,7 @@ GrB_Info GB_subassign_19
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -136,7 +137,7 @@ GrB_Info GB_subassign_19
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
@@ -260,7 +261,7 @@ GrB_Info GB_subassign_19
                 if (i == iM)
                 { 
                     // mij = (bool) M [pM]
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                     GB_NEXT (M) ;
                 }
                 else
diff --git a/Source/GB_subassign_20.c b/Source/GB_subassign_20.c
index d9964baa24..7bb2b4f773 100644
--- a/Source/GB_subassign_20.c
+++ b/Source/GB_subassign_20.c
@@ -2,7 +2,7 @@
 // GB_subassign_20: C(I,J)<!M,repl> += A ; using S
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GB_subassign_20
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -48,6 +49,7 @@ GrB_Info GB_subassign_20
     GB_GET_MASK ;
     const bool M_is_hyper = M->is_hyper ;
     const int64_t Mnvec = M->nvec ;
+    const int64_t mvlen = M->vlen ;
     GB_GET_A ;
     GB_GET_S ;
     GB_GET_ACCUM ;
@@ -105,6 +107,7 @@ GrB_Info GB_subassign_20
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -122,7 +125,7 @@ GrB_Info GB_subassign_20
                 if (iS < iA)
                 {
                     // S (i,j) is present but A (i,j) is not
-                    GB_MIJ_BINARY_SEARCH (iS) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                     mij = !mij ;
                     if (!mij)
                     { 
@@ -137,7 +140,7 @@ GrB_Info GB_subassign_20
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -150,7 +153,7 @@ GrB_Info GB_subassign_20
                 else
                 {
                     // both S (i,j) and A (i,j) present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     GB_C_S_LOOKUP ;
                     if (mij)
@@ -177,7 +180,7 @@ GrB_Info GB_subassign_20
             while (pS < pS_end)
             {
                 int64_t iS = Si [pS] ;
-                GB_MIJ_BINARY_SEARCH (iS) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
                 mij = !mij ;
                 if (!mij)
                 { 
@@ -195,7 +198,7 @@ GrB_Info GB_subassign_20
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
@@ -248,6 +251,7 @@ GrB_Info GB_subassign_20
 
             int64_t pM_start, pM_end ;
             GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
+            bool mjdense = (pM_end - pM_start) == mvlen ;
 
             //------------------------------------------------------------------
             // do a 2-way merge of S(:,j) and A(:,j)
@@ -270,7 +274,7 @@ GrB_Info GB_subassign_20
                 else if (iA < iS)
                 {
                     // S (i,j) is not present, A (i,j) is present
-                    GB_MIJ_BINARY_SEARCH (iA) ;
+                    GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                     mij = !mij ;
                     if (mij)
                     { 
@@ -294,7 +298,7 @@ GrB_Info GB_subassign_20
             {
                 // S (i,j) is not present, A (i,j) is present
                 int64_t iA = Ai [pA] ;
-                GB_MIJ_BINARY_SEARCH (iA) ;
+                GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
                 mij = !mij ;
                 if (mij)
                 { 
diff --git a/Source/GB_subassign_IxJ_slice.c b/Source/GB_subassign_IxJ_slice.c
index ab80740ae4..16ac1f6a62 100644
--- a/Source/GB_subassign_IxJ_slice.c
+++ b/Source/GB_subassign_IxJ_slice.c
@@ -2,7 +2,7 @@
 // GB_subassign_IxJ_slice: slice IxJ for subassign
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -91,22 +91,6 @@ GrB_Info GB_subassign_IxJ_slice
     int ntasks, max_ntasks = 0, nthreads ;
     GB_task_struct *TaskList = NULL ;
 
-    // printf ("nI "GBd" Ikind "GBd" Icolon "GBd" "GBd" "GBd"\n", nI, Ikind,
-    //     Icolon [0], Icolon [1], Icolon [2]) ;
-    // for (int64_t iA = 0 ; iA < nI ; iA++)
-    // {
-    //     int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
-    //     printf ("   iA "GBd" iC "GBd"\n", iA, iC) ;
-    // }
-
-    // printf ("nJ "GBd" Jkind "GBd" Jcolon "GBd" "GBd" "GBd"\n", nJ, Jkind,
-    //     Jcolon [0], Jcolon [1], Jcolon [2]) ;
-    // for (int64_t jA = 0 ; jA < nJ ; jA++)
-    // {
-    //     int64_t jC = GB_ijlist (J, jA, Jkind, Jcolon) ;
-    //     printf ("   jA "GBd" jC "GBd"\n", jA, jC) ;
-    // }
-
     //--------------------------------------------------------------------------
     // determine # of threads to use
     //--------------------------------------------------------------------------
diff --git a/Source/GB_subassign_emult_slice.c b/Source/GB_subassign_emult_slice.c
index 83bf128b69..71b445bbba 100644
--- a/Source/GB_subassign_emult_slice.c
+++ b/Source/GB_subassign_emult_slice.c
@@ -2,7 +2,7 @@
 // GB_subassign_emult_slice: slice the entries and vectors for GB_subassign_08
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,6 +22,8 @@
 
 #include "GB_subassign_methods.h"
 #include "GB_emult.h"
+// Npending is set to NULL by the GB_EMPTY_TASKLIST macro, but unused here.
+#include "GB_unused.h"
 
 #undef  GB_FREE_ALL
 #define GB_FREE_ALL                                                         \
@@ -39,8 +41,8 @@ GrB_Info GB_subassign_emult_slice
     int *p_nthreads,                // # of threads to use
     int64_t *p_Znvec,               // # of vectors to compute in Z
     const int64_t *GB_RESTRICT *Zh_handle,     // Zh is A->h, M->h, or NULL
-    int64_t *GB_RESTRICT *Z_to_A_handle, // Z_to_A: output of size Znvec, or NULL
-    int64_t *GB_RESTRICT *Z_to_M_handle, // Z_to_M: output of size Znvec, or NULL
+    int64_t *GB_RESTRICT *Z_to_A_handle, // Z_to_A: output size Znvec, or NULL
+    int64_t *GB_RESTRICT *Z_to_M_handle, // Z_to_M: output size Znvec, or NULL
     // input:
     const GrB_Matrix C,             // output matrix C
     const GrB_Index *I,
@@ -215,23 +217,17 @@ GrB_Info GB_subassign_emult_slice
                 int64_t iC_start = GB_IMIN (iC1, iC2) ;
                 int64_t iC_end   = GB_IMAX (iC1, iC2) ;
 
-                // printf ("\niA_start "GBd"\n", iA_start) ;
-                // printf ("iA_end   "GBd"\n", iA_end) ;
-
-                // printf ("\niC_start "GBd"\n", iC_start) ;
-                // printf ("iC_end   "GBd"\n", iC_end) ;
-
                 // this task works on Ci,Cx [pC:pC_end-1]
                 int64_t pleft = pC_start ;
                 int64_t pright = pC_end - 1 ;
                 bool found, is_zombie ;
-                GB_BINARY_SPLIT_ZOMBIE (iC_start, Ci, pleft, pright,
+                GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_start, Ci, pleft, pright,
                     found, nzombies, is_zombie) ;
                 TaskList [taskid].pC = pleft ;
 
                 pleft = pC_start ;
                 pright = pC_end - 1 ;
-                GB_BINARY_SPLIT_ZOMBIE (iC_end, Ci, pleft, pright,
+                GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_end, Ci, pleft, pright,
                     found, nzombies, is_zombie) ;
                 TaskList [taskid].pC_end = (found) ? (pleft+1) : pleft ;
             }
diff --git a/Source/GB_subassign_methods.h b/Source/GB_subassign_methods.h
index 0be75bfff8..d676669232 100644
--- a/Source/GB_subassign_methods.h
+++ b/Source/GB_subassign_methods.h
@@ -2,7 +2,7 @@
 // GB_subassign_methods.h: definitions for GB_subassign methods
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -56,14 +56,13 @@
 // GB_GET_MASK: get the mask matrix M
 //------------------------------------------------------------------------------
 
-#define GB_GET_MASK                                                         \
-    ASSERT_MATRIX_OK (M, "M for assign", GB0) ;                         \
+#define GB_GET_MASK                                                            \
+    ASSERT_MATRIX_OK (M, "M for assign", GB0) ;                                \
     const int64_t *GB_RESTRICT Mp = M->p ;                                     \
     const int64_t *GB_RESTRICT Mh = M->h ;                                     \
     const int64_t *GB_RESTRICT Mi = M->i ;                                     \
-    const GB_void *GB_RESTRICT Mx = M->x ;                                     \
-    const size_t msize = M->type->size ;                                    \
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;            \
+    const size_t msize = M->type->size ;
 
 //  const bool M_is_hyper = M->is_hyper ;
 //  const int64_t Mnvec = M->nvec ;
@@ -87,13 +86,13 @@
 //------------------------------------------------------------------------------
 
 #define GB_GET_A                                                            \
-    ASSERT_MATRIX_OK (A, "A for assign", GB0) ;                         \
+    ASSERT_MATRIX_OK (A, "A for assign", GB0) ;                             \
     GrB_Type atype = A->type ;                                              \
     size_t asize = atype->size ;                                            \
     GB_Type_code acode = atype->code ;                                      \
-    const int64_t *GB_RESTRICT Ap = A->p ;                                     \
-    const int64_t *GB_RESTRICT Ai = A->i ;                                     \
-    const GB_void *GB_RESTRICT Ax = A->x ;                                     \
+    const int64_t *GB_RESTRICT Ap = A->p ;                                  \
+    const int64_t *GB_RESTRICT Ai = A->i ;                                  \
+    const GB_void *GB_RESTRICT Ax = A->x ;                                  \
     GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;
 
 //  const int64_t *GB_RESTRICT Ah = A->h ;
@@ -105,7 +104,7 @@
 //------------------------------------------------------------------------------
 
 #define GB_GET_SCALAR                                                       \
-    ASSERT_TYPE_OK (atype, "atype for assign", GB0) ;                 \
+    ASSERT_TYPE_OK (atype, "atype for assign", GB0) ;                       \
     size_t asize = atype->size ;                                            \
     GB_Type_code acode = atype->code ;                                      \
     GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;         \
@@ -224,7 +223,8 @@
         int64_t pC = pC_start ;                                             \
         int64_t pright = pC_end - 1 ;                                       \
         bool cij_found, is_zombie ;                                         \
-        GB_BINARY_ZOMBIE (iC, Ci, pC, pright, cij_found, zorig, is_zombie) ;
+        GB_BINARY_SEARCH_ZOMBIE (iC, Ci, pC, pright, cij_found, zorig,      \
+            is_zombie) ;
 
     //--------------------------------------------------------------------------
     // for a 2-way or 3-way merge
@@ -1121,6 +1121,21 @@ GrB_Info GB_subassign_05
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
+    const void *scalar,
+    const GrB_Type atype,
+    GB_Context Context
+) ;
+
+//------------------------------------------------------------------------------
+// GB_subassign_05e: C(:,:)<M,struct> = scalar ; no S, C empty
+//------------------------------------------------------------------------------
+
+GrB_Info GB_subassign_05e
+(
+    GrB_Matrix C,
+    // input:
+    const GrB_Matrix M,
     const void *scalar,
     const GrB_Type atype,
     GB_Context Context
@@ -1143,6 +1158,7 @@ GrB_Info GB_subassign_06n
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     GB_Context Context
 ) ;
@@ -1164,6 +1180,7 @@ GrB_Info GB_subassign_06s
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -1186,6 +1203,7 @@ GrB_Info GB_subassign_07
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -1209,6 +1227,7 @@ GrB_Info GB_subassign_08
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     GB_Context Context
@@ -1231,6 +1250,7 @@ GrB_Info GB_subassign_09
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -1254,6 +1274,7 @@ GrB_Info GB_subassign_10
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -1276,6 +1297,7 @@ GrB_Info GB_subassign_11
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -1300,6 +1322,7 @@ GrB_Info GB_subassign_12
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -1323,6 +1346,7 @@ GrB_Info GB_subassign_13
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -1346,6 +1370,7 @@ GrB_Info GB_subassign_14
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -1368,6 +1393,7 @@ GrB_Info GB_subassign_15
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -1392,6 +1418,7 @@ GrB_Info GB_subassign_16
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -1415,6 +1442,7 @@ GrB_Info GB_subassign_17
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const void *scalar,
     const GrB_Type atype,
     const GrB_Matrix S,
@@ -1438,6 +1466,7 @@ GrB_Info GB_subassign_18
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix S,
     GB_Context Context
@@ -1460,6 +1489,7 @@ GrB_Info GB_subassign_19
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const void *scalar,
     const GrB_Type atype,
@@ -1484,6 +1514,7 @@ GrB_Info GB_subassign_20
     const int Jkind,
     const int64_t Jcolon [3],
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_BinaryOp accum,
     const GrB_Matrix A,
     const GrB_Matrix S,
@@ -1564,7 +1595,7 @@ GrB_Info GB_subassign_20
 // GB_SUBASSIGN_EMULT_SLICE: slice A.*M (just Method 08)
 //------------------------------------------------------------------------------
 
-// Method 08 only.  If C is dense, it is sliced for a fine task, so that
+// Method 08 only.  If C is sparse, it is sliced for a fine task, so that
 // it can do a binary search via GB_iC_BINARY_SEARCH.  But if C(:,jC) is dense,
 // C(:,jC) is not sliced, so the fine task must do a direct lookup via
 // GB_iC_DENSE_LOOKUP.  Otherwise a race condition will occur.
@@ -1668,8 +1699,8 @@ GrB_Info GB_subassign_emult_slice
     int *p_nthreads,                // # of threads to use
     int64_t *p_Znvec,               // # of vectors to compute in Z
     const int64_t *GB_RESTRICT *Zh_handle,     // Zh is A->h, M->h, or NULL
-    int64_t *GB_RESTRICT *Z_to_A_handle, // Z_to_A: output of size Znvec, or NULL
-    int64_t *GB_RESTRICT *Z_to_M_handle, // Z_to_M: output of size Znvec, or NULL
+    int64_t *GB_RESTRICT *Z_to_A_handle, // Z_to_A: output, size Znvec, or NULL
+    int64_t *GB_RESTRICT *Z_to_M_handle, // Z_to_M: output, size Znvec, or NULL
     // input:
     const GrB_Matrix C,             // output matrix C
     const GrB_Index *I,
@@ -1803,25 +1834,33 @@ GrB_Info GB_subassign_emult_slice
     {                                                                       \
         int64_t pright = p ## X ## _end - 1 ;                               \
         bool found ;                                                        \
-        GB_BINARY_SPLIT_SEARCH (iA_start, X ## i, p ## X, pright, found) ;  \
+        GB_SPLIT_BINARY_SEARCH (iA_start, X ## i, p ## X, pright, found) ;  \
     }
 
 //------------------------------------------------------------------------------
-// GB_MIJ_BINARY_SEARCH
+// GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP
 //------------------------------------------------------------------------------
 
-// mij = M(iA,j)
+// mij = M(i,j)
 
-#define GB_MIJ_BINARY_SEARCH(i)                                             \
+#define GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP(i)                             \
     bool mij ;                                                              \
+    if (mjdense)                                                            \
+    {                                                                       \
+        /* M(:,j) is dense, no need for binary search */                    \
+        int64_t pM = pM_start + i ;                                         \
+        mij = GB_mcast (Mx, pM, msize) ;                                    \
+    }                                                                       \
+    else                                                                    \
     {                                                                       \
+        /* M(:,j) is sparse, binary search for M(i,j) */                    \
         int64_t pM     = pM_start ;                                         \
         int64_t pright = pM_end - 1 ;                                       \
         bool found ;                                                        \
         GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;                       \
         if (found)                                                          \
         {                                                                   \
-            cast_M (&mij, Mx +(pM*msize), 0) ;                              \
+            mij = GB_mcast (Mx, pM, msize) ;                                \
         }                                                                   \
         else                                                                \
         {                                                                   \
diff --git a/Source/GB_subassign_one_slice.c b/Source/GB_subassign_one_slice.c
index bfd16448f5..51092a9304 100644
--- a/Source/GB_subassign_one_slice.c
+++ b/Source/GB_subassign_one_slice.c
@@ -2,7 +2,7 @@
 // GB_subassign_one_slice: slice the entries and vectors for subassign
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -142,7 +142,7 @@ GrB_Info GB_subassign_one_slice
     //--------------------------------------------------------------------------
 
     if (!GB_pslice (&Coarse, /* A */ A->p, A->nvec, ntasks1))
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
@@ -184,7 +184,6 @@ GrB_Info GB_subassign_one_slice
             GB_REALLOC_TASK_LIST (TaskList, ntasks + 1, max_ntasks) ;
             TaskList [ntasks].kfirst = k ;
             TaskList [ntasks].klast  = klast ;
-            // printf ("%d: coarse "GBd":"GBd"\n", ntasks, k, klast) ;
             ntasks++ ;
 
         }
@@ -222,8 +221,6 @@ GrB_Info GB_subassign_one_slice
             ASSERT (k >= 0 && k < anvec) ;
             int64_t j = (Ah == NULL) ? k : Ah [k] ;
             ASSERT (j >= 0 && j < nJ) ;
-            // printf ("J %p j "GBd" jkind %d Jcolon "GBd" "GBd" "GBd"\n",
-            //     J, j, Jkind, Jcolon [0], Jcolon [1], Jcolon [2]) ;
             int64_t GB_LOOKUP_jC ;
 
             bool jC_dense = (pC_end - pC_start == cvlen) ;
@@ -252,7 +249,6 @@ GrB_Info GB_subassign_one_slice
 
                 TaskList [ntasks].kfirst = k ;
                 TaskList [ntasks].klast  = k ;
-                // printf ("%d: thin coarse "GBd":"GBd"\n", ntasks, k, k) ;
                 ntasks++ ;
 
             }
@@ -303,33 +299,21 @@ GrB_Info GB_subassign_one_slice
                         int64_t iC_start = GB_IMIN (iC1, iC2) ;
                         int64_t iC_end   = GB_IMAX (iC1, iC2) ;
 
-                        // printf ("\niA_start "GBd"\n", iA_start) ;
-                        // printf ("iA_end   "GBd"\n", iA_end) ;
-
-                        // printf ("\niC_start "GBd"\n", iC_start) ;
-                        // printf ("iC_end   "GBd"\n", iC_end) ;
-
                         // this task works on Ci,Cx [pC:pC_end-1]
                         int64_t pleft = pC_start ;
                         int64_t pright = pC_end - 1 ;
                         bool found, is_zombie ;
-                        GB_BINARY_SPLIT_ZOMBIE (iC_start, Ci, pleft, pright,
-                            found, nzombies, is_zombie) ;
+                        GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_start, Ci,
+                            pleft, pright, found, nzombies, is_zombie) ;
                         TaskList [ntasks].pC = pleft ;
 
                         pleft = pC_start ;
                         pright = pC_end - 1 ;
-                        GB_BINARY_SPLIT_ZOMBIE (iC_end, Ci, pleft, pright,
-                            found, nzombies, is_zombie) ;
+                        GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_end, Ci,
+                            pleft, pright, found, nzombies, is_zombie) ;
                         TaskList [ntasks].pC_end = (found) ? (pleft+1) : pleft ;
                     }
 
-                    // printf ("%d: fine "GBd": pA "GBd" pA_end "GBd
-                    //    " pC "GBd" pC_end "GBd"\n",
-                    //    ntasks, k,
-                    //    TaskList [ntasks].pA, TaskList [ntasks].pA_end,
-                    //    TaskList [ntasks].pC, TaskList [ntasks].pC_end) ;
-
                     ASSERT (TaskList [ntasks].pA <= TaskList [ntasks].pA_end) ;
                     ASSERT (TaskList [ntasks].pC <= TaskList [ntasks].pC_end) ;
                     ntasks++ ;
diff --git a/Source/GB_subassign_scalar.c b/Source/GB_subassign_scalar.c
index 6d55440656..6553110fa1 100644
--- a/Source/GB_subassign_scalar.c
+++ b/Source/GB_subassign_scalar.c
@@ -2,7 +2,7 @@
 // GB_subassign_scalar: C(Rows,Cols)<M> = accum (C(Rows,Cols),x)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -45,7 +45,8 @@ GrB_Info GB_subassign_scalar        // C(Rows,Cols)<M> += x
     ASSERT (scalar_code <= GB_UDT_code) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C(Rows,Cols)<M> = accum (C(Rows,Cols), scalar)
@@ -53,7 +54,7 @@ GrB_Info GB_subassign_scalar        // C(Rows,Cols)<M> += x
 
     return (GB_subassign (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         false,                      // do not transpose the mask
         accum,                      // for accum (C(Rows,Cols),scalar)
         NULL,       false,          // no explicit matrix A
diff --git a/Source/GB_subassigner.c b/Source/GB_subassigner.c
index a349bb8584..f7b9a8709a 100644
--- a/Source/GB_subassigner.c
+++ b/Source/GB_subassigner.c
@@ -2,7 +2,7 @@
 // GB_subassigner: C(I,J)<#M> = accum (C(I,J), A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -63,6 +63,7 @@
 #include "GB_subassign.h"
 #include "GB_subassign_methods.h"
 #include "GB_subref.h"
+#include "GB_dense.h"
 #ifdef GB_DEBUG
 #include "GB_iterator.h"
 #endif
@@ -80,6 +81,7 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     bool C_replace,                 // C matrix descriptor
     const GrB_Matrix M_input,       // optional mask for C(I,J), unused if NULL
     const bool Mask_comp,           // mask descriptor
+    const bool Mask_struct,         // if true, use the only structure of M
     const GrB_BinaryOp accum,       // optional accum for Z=accum(C(I,J),A)
     const GrB_Matrix A_input,       // input matrix (NULL for scalar expansion)
     const GrB_Index *I_input,       // list of indices
@@ -107,13 +109,6 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     GrB_Index *GB_RESTRICT J2  = NULL ;
     GrB_Index *GB_RESTRICT J2k = NULL ;
 
-    /*
-    GrB_Index *I2  = NULL ;
-    GrB_Index *I2k = NULL ;
-    GrB_Index *J2  = NULL ;
-    GrB_Index *J2k = NULL ;
-    */
-
     GrB_Matrix A = A_input ;
     GrB_Matrix M = M_input ;
     int64_t ni = ni_input ;
@@ -123,6 +118,7 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     #define I ((I_jumbled) ? I2 : I_input)
     #define J ((J_jumbled) ? J2 : J_input)
 
+    // GB_subassigner cannot tolerate C==A and C==M aliasing.  A==M is OK.
     ASSERT (C != NULL) ;
     ASSERT (!GB_aliased (C, M)) ;
     ASSERT (!GB_aliased (C, A)) ;
@@ -131,48 +127,69 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     // delete any lingering zombies and assemble any pending tuples
     //--------------------------------------------------------------------------
 
+    ASSERT_MATRIX_OK (C, "C input for subassigner", GB0) ;
+
     // subassign tolerates both zombies and pending tuples in C, but not M or A
     GB_WAIT (M) ;
     GB_WAIT (A) ;
 
     //--------------------------------------------------------------------------
-    // check empty mask conditions
+    // check mask conditions
     //--------------------------------------------------------------------------
 
+    bool empty_mask = false ;   // true if mask not present and complemented
+    bool no_mask = false ;      // true if mask not present and not complemented
+
     if (M == NULL)
     {
-        // the mask is empty
+        // the mask is not present
         if (Mask_comp)
         {
-            // an empty mask is complemented
+            // empty_mask:  mask is not present, and complemented
+            empty_mask = true ;
             if (!C_replace)
             { 
                 // No work to do.  This the same as the GB_RETURN_IF_QUICK_MASK
                 // case in other GraphBLAS functions, except here only the
-                // sub-case of C_replace=false is handled.  The C_replace=true
-                // sub-case needs to delete all entries in C(I,J), which is
-                // handled below in GB_subassign_00.
+                // sub-case of C_replace == false is handled.  The C_replace ==
+                // true sub-case needs to delete all entries in C(I,J), which
+                // is handled below in GB_subassign_00.  This "quick" case is
+                // checked again if C_replace becomes effectively false, below.
+                GBBURBLE ("quick ") ;
                 return (GrB_SUCCESS) ;
             }
         }
         else
-        { 
-            // The mask is empty and not complemented.  In this case, C_replace
-            // is effectively false.  Disable it, since it can force pending
-            // tuples to be assembled.  In the comments below "C_replace
-            // effectively false" means that either C_replace is false on
-            // input, or the mask is empty and not complemented and thus
-            // C_replace is set to false here.
-            C_replace = false ;
+        {
+            // no_mask:  mask is not present, and not complemented
+            no_mask = true ;
+            if (C_replace)
+            { 
+                // The mask is not present and not complemented.  In this case,
+                // C_replace is effectively false.  Disable it, since it can
+                // force pending tuples to be assembled.  In the comments below
+                // "C_replace effectively false" means that either C_replace is
+                // false on input, or the mask is not present and not
+                // complemented and thus C_replace is set to false here.
+                GBBURBLE ("(no mask: C_replace effectively false) ") ;
+                C_replace = false ;
+            }
         }
     }
 
-    // C_replace now has its effective value: can only be true if true on
-    // input and if the mask is present, or empty and complemented.  C_replace
-    // is false if it is false on input, or if the mask is empty and not
-    // complemented.
+    //--------------------------------------------------------------------------
+    // check if C is empty
+    //--------------------------------------------------------------------------
 
-    ASSERT (GB_IMPLIES (M == NULL && !Mask_comp, C_replace == false)) ;
+    bool C_is_empty = (GB_NNZ (C) == 0 && !GB_PENDING (C) && !GB_ZOMBIES (C)) ;
+    if (C_is_empty)
+    { 
+        // C is completely empty.  C_replace is irrelevant, so set it to false.
+        // The burble for this case occurs below, after GB_wait (C), since C
+        // may become empty if it contains nothing but zombies, or after the
+        // GB_clear (C) below.
+        C_replace = false ;
+    }
 
     //--------------------------------------------------------------------------
     // get the C matrix
@@ -354,13 +371,12 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         ASSERT (scalar != NULL) ;
         anz = mn ;
         A_is_dense = true ;
-        // a run-time or compile-time user-defined scalar is assumed to have
-        // the same type as C->type which is also user-defined (or else it
-        // would not be compatible).  Compatibility has already been checked in
-        // the caller.  The type of scalar for built-in types is determined by
-        // scalar_code, instead, since it can differ from C (in which case it
-        // is typecasted into C->type).  User-defined scalars cannot be
-        // typecasted.
+        // a user-defined scalar is assumed to have the same type as C->type
+        // which is also user-defined (or else it would not be compatible).
+        // Compatibility has already been checked in the caller.  The type of
+        // scalar for built-in types is determined by scalar_code, instead,
+        // since it can differ from C (in which case it is typecasted into
+        // C->type).  User-defined scalars cannot be typecasted.
         atype = GB_code_type (scalar_code, C->type) ;
         ASSERT_TYPE_OK (atype, "atype for scalar expansion", GB0) ;
     }
@@ -390,16 +406,115 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         ASSERT (nI == M->vlen && nJ == M->vdim) ;
     }
 
+    //--------------------------------------------------------------------------
+    // C(:,:) assignment
+    //--------------------------------------------------------------------------
+
+    // whole_C_matrix is true if all of C(:,:) is being assigned to
+    bool whole_C_matrix = (Ikind == GB_ALL) && (Jkind == GB_ALL) ;
+
+    bool C_splat_scalar = false ;   // C(:,:) = x
+    bool C_splat_matrix = false ;   // C(:,:) = A
+
+    if (whole_C_matrix && no_mask && (accum == NULL))
+    {
+
+        //----------------------------------------------------------------------
+        // C(:,:) = x or A:  whole matrix assignment with no mask
+        //----------------------------------------------------------------------
+
+        if (scalar_expansion)
+        { 
+            // Method 21: C(:,:) = x
+            C_splat_scalar = true ;
+        }
+        else
+        { 
+            // Method 24: C(:,:) = A
+            C_splat_matrix = true ;
+        }
+        // C_replace is already effectively false (see no_mask case above)
+        ASSERT (C_replace == false) ;
+
+        // free pending tuples early but do not clear all of C.  If it is
+        // already dense then its pattern can be reused.
+        GB_Pending_free (&(C->Pending)) ;
+
+    }
+    else if (whole_C_matrix && C_replace && (accum == NULL))
+    {
+
+        //----------------------------------------------------------------------
+        // C(:,:)<any mask, replace> = A or x, no accum operator present
+        //----------------------------------------------------------------------
+
+        // If the entire C(:,:) is being assigned to, and if no accum operator
+        // is present, then the matrix can be cleared of all entries now, and
+        // then C_replace can be set false.  This can only be done because C is
+        // not aliased to M or A on input. which the caller ensures is true.
+        // See the assertion above.  Clearing C now speeds up the assignment
+        // since the wait on C can be skipped, below.  It also simplifies the
+        // kernels.  If S is constructed, it is just an empty matrix.
+
+        GB_OK (GB_clear (C, Context)) ;
+        if (C_replace)
+        { 
+            GBBURBLE ("(C cleared early) ") ;
+            C_replace = false ;
+        }
+
+        // By clearing C now and setting C_replace to false, the following
+        // methods are used: 09 becomes 05, 10 becomes 06n or 06s, 17
+        // becomes 13, and 18 becomes 14.  The S matrix for methods 06s,
+        // 13, and 14 is still created, but it is very fast to construct
+        // and traverse since C is empty.  Method 00 can be skipped since
+        // C is already empty (see "quick" case below).
+
+        // prior time             new  time           action
+        // ----- ----             ---  ----           ------
+
+        // 00:  O(S)              nothing, O(1)       C already cleared
+
+        // 09:  O(M+S)            05:  O(M)           C<M> = x, no S
+
+        // 10:  O((A+S)*log(m))   06n: O(M*(log(a))   C<M> = A, no S
+        //                        06s: O(A*(log(m))   C<M> = A, with S
+
+        // 17:  O(m*n)            13:  O(m*n)         C<!M> = x, with S
+
+        // 18:  O(A*log(m))       14:  O(A*log(m))    C<!M> = A, with S
+
+        //  =====================       ==============
+        //  M   cmp rpl acc A   S       method: action
+        //  =====================       ==============
+
+        //  M   -   -   -   -   -       05:  C(I,J)<M> = x, no S
+        //  M   -   -   -   A   -       06n: C(I,J)<M> = A, no S
+        //  M   -   -   -   A   S       06s: C(I,J)<M> = A, with S
+
+        //  M   -   r   -   -   S       09:  C(I,J)<M,repl> = x, with S
+        //  M   -   r   -   A   S       10:  C(I,J)<M,repl> = A, with S
+
+        //  M   c   -   -   -   S       13:  C(I,J)<!M> = x, with S
+        //  M   c   -   -   A   S       14:  C(I,J)<!M> = A, with S
+
+        //  M   c   r   -   -   S       17:  C(I,J)<!M,repl> = x, with S
+        //  M   c   r   -   A   S       18:  C(I,J)<!M,repl> = A, with S
+
+        // Methods 09, 10, 17, and 18 are now used only if C(I,J) is a
+        // submatrix of C, and not for the whole_C_matrix case.
+    }
+
     //--------------------------------------------------------------------------
     // check compatibilty of prior pending tuples
     //--------------------------------------------------------------------------
 
     // The action: ( delete ), described below, can only delete a live
     // entry in the pattern.  It cannot delete a pending tuple; pending tuples
-    // cannot become zombies.  Thus, if this call to GxB_subassign has the
+    // cannot become zombies.  Thus, if this call to GB_subassigner has the
     // potential for creating zombies, all prior pending tuples must be
     // assembled now.  They thus become live entries in the pattern of C, so
-    // that this GxB_subassign can (potentially) turn them into zombies via
+    // that this GB_subassigner can (potentially) turn them into zombies via
     // action: ( delete ).
 
     // If accum is NULL, the operation is C(I,J) = A, or C(I,J)<M> = A.
@@ -415,7 +530,7 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     // All pending tuples will be assembled sometime later on, using a single
     // pending operator, and thus the current accum operator must match the
     // prior pending operator.  If the operators do not match, then all prior
-    // pending tuples must be assembled now, so that this GxB_subassign can
+    // pending tuples must be assembled now, so that this GB_subassigner can
     // (potentially) insert new pending tuples whose pending operator is accum.
 
     // These tests are conservative because it is possible that this
@@ -530,15 +645,45 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     ASSERT_MATRIX_OK (C, "C before subassign", GB0) ;
     ASSERT_BINARYOP_OK_OR_NULL (accum, "accum for assign", GB0) ;
 
+    //--------------------------------------------------------------------------
+    // check again if C is empty
+    //--------------------------------------------------------------------------
+
+    // GB_clear or GB_wait, above, may have deleted all the zombies in C, so
+    // check again if C is empty.
+    C_is_empty = (GB_NNZ (C) == 0 && !GB_PENDING (C) && !GB_ZOMBIES (C)) ;
+    if (C_is_empty)
+    { 
+        // C is completely empty.  C_replace is irrelevant, so set it to false.
+        GBBURBLE ("(C empty) ") ;
+        C_replace = false ;
+    }
+
+    //--------------------------------------------------------------------------
+    // check "quick" case again
+    //--------------------------------------------------------------------------
+
+    if (empty_mask && !C_replace)
+    { 
+        // The mask is empty (not present, but complemented), and C_replace is
+        // now effectively false.  If C_replace was false on input, then the
+        // "quick" case above has already been triggered.  However, if C is now
+        // empty (either cleared with GB_clear, empty on input, or empty after
+        // GB_wait), then C_replace is now effectively false.  In this case,
+        // the "quick" case can be checked again.  No more work to do.
+        GBBURBLE ("quick ") ;
+        return (GrB_SUCCESS) ;
+    }
+
     //--------------------------------------------------------------------------
     // keep track of the current accum operator
     //--------------------------------------------------------------------------
 
     // If accum is NULL and pending tuples are added, they will be assembled
     // sometime later (not here) using the implied SECOND_Ctype operator.  This
-    // GxB_subassign operation corresponds to C(I,J)=A or C(I,J)<M>=A.
-    // Subsequent calls to GrB_setElement, and subsequent calls to
-    // GxB_subassign with an explict SECOND_Ctype operator, may create
+    // GB_subassigner operation corresponds to C(I,J)=A or C(I,J)<M>=A.
+    // Subsequent calls to GrB_setElement, and subsequent calls to GrB_assign
+    // or GxB_subassign with an explict SECOND_Ctype operator, may create
     // additional pending tuples and add them to the list without requiring
     // that they be assembled first.
 
@@ -558,12 +703,19 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     // select the method to use
     //--------------------------------------------------------------------------
 
-    // FUTURE: add method 01n for C(:,:) = scalar when nnz (C) == 0.
-
-    bool S_Extraction = true ;
-
-    // empty_mask:  C(I,J)<!> = ... ; empty mask is complemented
-    bool empty_mask =(Mask_comp && M == NULL) ;
+    // check if C is competely dense:  all entries present and no pending work.
+    bool C_is_dense = !GB_PENDING_OR_ZOMBIES (C) && GB_is_dense (C) ;
+    bool C_dense_update = false ;
+    if (C_is_dense)
+    { 
+        GBBURBLE ("(C dense) ") ;
+        if (whole_C_matrix && no_mask && (accum != NULL)
+            && (C->type == accum->ztype) && (C->type == accum->xtype))
+        { 
+            // C(:,:) += x or A, where C is dense, no typecasting of C
+            C_dense_update = true ;
+        }
+    }
 
     // simple_mask: C(I,J)<M> = ... ; or C(I,J)<M> += ...
     bool simple_mask = (!C_replace && M != NULL && !Mask_comp) ;
@@ -574,30 +726,71 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     // C_Mask_matrix:  C(I,J)<M> = A or += A
     bool C_Mask_matrix = (!scalar_expansion && simple_mask) ;
 
+    bool S_Extraction ;
     if (empty_mask)
     { 
-        // use Method 00: C(I,J) = empty
+        // The mask is not present, but complemented.
+        // Method 00: C(I,J)<!,repl> = empty
         S_Extraction = true ;
     }
+    else if (C_splat_scalar)
+    { 
+        // Method 21: C(:,:) = x where x is a scalar; C becomes dense
+        S_Extraction = false ;
+    }
+    else if (C_splat_matrix)
+    { 
+        // Method 24: C(:,:) = A
+        S_Extraction = false ;
+    }
+    else if (C_dense_update)
+    { 
+        // Methods 22 and 23: C(:,:) += x or A where C is dense
+        S_Extraction = false ;
+    }
     else if (C_Mask_scalar)
     { 
-        // use Method 05 or 07: C(I,J)<M> = or += scalar; C_replace false
+        // Method 05*, or 07: C(I,J)<M> = or += scalar; C_replace false
         S_Extraction = false ;
     }
     else if (C_Mask_matrix)
     {
+        // C(I,J)<M> = A or += A
         if (accum != NULL)
         { 
-            // C(I,J)<M> += A always uses method 08.  S is not constructed.
+            // Method 08: C(I,J)<M> += A
             S_Extraction = false ;
         }
         else
         { 
             // C(I,J)<M> = A ;  use 06s (with S) or 06n (without S)
-            // method 06s (with S) is faster when nnz (A) < nnz (M)
-            S_Extraction = (anz < GB_NNZ (M)) ;
+            // method 06s (with S) is faster when nnz (A) < nnz (M).
+            // If M and A are aliased, then nnz (A) == nnz (M), so method
+            // 06n is used.
+            if (C_is_dense && whole_C_matrix && M == A)
+            {
+                // Method 06d: C<A> = A
+                S_Extraction = false ;
+            }
+            else if (C_is_empty && whole_C_matrix && A_is_dense && Mask_struct)
+            {
+                // Method 25: C<M,s> = A, where M is structural, A is
+                // dense, and C starts out empty.  The pattern of C will be the
+                // same as M, and the subassign method is extremely simple.
+                S_Extraction = false ;
+            }
+            else
+            {
+                // Method 06n: or Method 06s:
+                S_Extraction = (anz < GB_NNZ (M)) ;
+            }
         }
     }
+    else
+    { 
+        // all other methods require S
+        S_Extraction = true ;
+    }
 
     //--------------------------------------------------------------------------
     // extract the pattern: S = C(I,J) for S_Extraction method, and quick mask
@@ -620,6 +813,9 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         // extract symbolic structure S=C(I,J)
         //----------------------------------------------------------------------
 
+        // FUTURE::: if whole_C_matrix is true, then C(:,:) = ... and S == C,
+        // except that S is zombie-free, read-only; and C collects zombies.
+
         // FUTURE:: the properties of I and J are already known, and thus do
         // not need to be recomputed by GB_subref.
 
@@ -679,10 +875,12 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
     // determined here; it is not a user input).  The first 5 options are
     // determined by the input.  The table below has been pruned to remove
     // combinations that are not used, or equivalent to other entries in the
-    // table.  Only 22 unique combinations of the 64 combinations are needed.
+    // table.  Only 22 unique combinations of the 64 combinations are needed,
+    // with additional special cases when C(:,:) is dense.
 
     //      M           present or NULL
     //      Mask_comp   true or false
+    //      Mask_struct structural or valued mask
     //      C_replace   true or false
     //      accum       present or NULL
     //      A           scalar (x) or matrix (A)
@@ -700,15 +898,24 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         //  M   cmp rpl acc A   S       method: action
         //  =====================       ==============
 
+        //  -   -   x   -   -   -       21:  C = x, no S, C anything
+        //  -   -   x   -   A   -       24:  C = A, no S, C and A anything
+        //  -   -   -   +   -   -       22:  C += x, no S, C dense
+        //  -   -   -   +   A   -       23:  C += A, no S, C dense
+
         //  -   -   -   -   -   S       01:  C(I,J) = x, with S
-        //  -   -   -   -   -   S       01n: C(:,:) = x, no S, nnz(C) == 0
         //  -   -   -   -   A   S       02:  C(I,J) = A, with S
         //  -   -   -   +   -   S       03:  C(I,J) += x, with S
         //  -   -   -   +   A   S       04:  C(I,J) += A, with S
         //  -   -   r                        uses methods 01, 02, 03, 04
-        //  -   c   -                   no work to do
+        //  -   c   -                        no work to do
         //  -   c   r           S       00:  C(I,J)<!,repl> = empty, with S
+
+        //  M   -   -   -   -   -       05d: C<M> = x, no S, C dense
+        //  M   -   -   -   -   -       05e: C<M,s> = x, no S, C empty
         //  M   -   -   -   -   -       05:  C(I,J)<M> = x, no S
+        //  A   -   -   -   A   -       06d: C<A> = A, no S, C dense
+        //  M   -   -   -   A   -       20:  C<M,s> = A, A dense, C empty
         //  M   -   -   -   A   -       06n: C(I,J)<M> = A, no S
         //  M   -   -   -   A   S       06s: C(I,J)<M> = A, with S
         //  M   -   -   +   -   -       07:  C(I,J)<M> += x, no S
@@ -717,6 +924,7 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         //  M   -   r   -   A   S       10:  C(I,J)<M,repl> = A, with S
         //  M   -   r   +   -   S       11:  C(I,J)<M,repl> += x, with S
         //  M   -   r   +   A   S       12:  C(I,J)<M,repl> += A, with S
+
         //  M   c   -   -   -   S       13:  C(I,J)<!M> = x, with S
         //  M   c   -   -   A   S       14:  C(I,J)<!M> = A, with S
         //  M   c   -   +   -   S       15:  C(I,J)<!M> += x, with S
@@ -726,10 +934,16 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         //  M   c   r   +   -   S       19:  C(I,J)<!M,repl> += x, with S
         //  M   c   r   +   A   S       20:  C(I,J)<!M,repl> += A, with S
 
+        //----------------------------------------------------------------------
+        // FUTURE::: C<C,s> = x    C == M, replace all values, C_replace ignored
+        // FUTURE::: C<C,s> += x   C == M, update all values, C_replace ignored
+        // FUTURE::: C<C,s> = A    C == M, A dense, C_replace ignored
+        //----------------------------------------------------------------------
+
     // For the single case C(I,J)<M>=A, two methods can be used: 06n and 06s.
 
-    // FUTURE:: create a set of methods that operate on a dense matrix C.
-    // The matrix S is not needed.
+    #define Istring ((Ikind == GB_ALL) ? ":" : "I")
+    #define Jstring ((Jkind == GB_ALL) ? ":" : "I")
 
     if (empty_mask)
     { 
@@ -747,10 +961,96 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         ASSERT (S != NULL) ;
 
         // Method 00: C(I,J) = empty ; using S
+        GBBURBLE ("Method 00: C(%s,%s) = empty ; using S ",
+            Istring, Jstring) ;
         GB_OK (GB_subassign_00 (C,
             I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
             S, Context)) ;
 
+    }
+    else if (C_splat_scalar)
+    { 
+
+        //----------------------------------------------------------------------
+        // C = x where x is a scalar; C becomes dense
+        //----------------------------------------------------------------------
+
+        //  =====================       ==============
+        //  M   cmp rpl acc A   S       method: action
+        //  =====================       ==============
+
+        //  -   -   x   -   -   -       21:  C = x, no S, C anything
+
+        ASSERT (whole_C_matrix) ;           // C(:,:) is modified
+        ASSERT (M == NULL) ;                // no mask present
+        ASSERT (accum == NULL) ;            // accum is not present
+        ASSERT (!C_replace) ;               // C_replace is effectively false
+        ASSERT (S == NULL) ;                // S is not used
+        ASSERT (scalar_expansion) ;         // x is a scalar
+
+        // Method 21: C = x where x is a scalar; C becomes dense
+        GBBURBLE ("Method 21: (C dense) = scalar ") ;
+        GB_OK (GB_dense_subassign_21 (C, scalar, atype, Context)) ;
+
+    }
+    else if (C_splat_matrix)
+    { 
+
+        //----------------------------------------------------------------------
+        // C = A
+        //----------------------------------------------------------------------
+
+        //  =====================       ==============
+        //  M   cmp rpl acc A   S       method: action
+        //  =====================       ==============
+
+        //  -   -   x   -   A   -       24:  C = A, no S, C and A anything
+
+        ASSERT (whole_C_matrix) ;           // C(:,:) is modified
+        ASSERT (M == NULL) ;                // no mask present
+        ASSERT (accum == NULL) ;            // accum is not present
+        ASSERT (!C_replace) ;               // C_replace is effectively false
+        ASSERT (S == NULL) ;                // S is not used
+        ASSERT (!scalar_expansion) ;        // A is a matrix
+
+        // Method 24: C = A
+        GBBURBLE ("Method 24: C = Z ") ;
+        GB_OK (GB_dense_subassign_24 (C, A, Context)) ;
+
+    }
+    else if (C_dense_update)
+    { 
+
+        //----------------------------------------------------------------------
+        // C += A or x where C is dense
+        //----------------------------------------------------------------------
+
+        //  =====================       ==============
+        //  M   cmp rpl acc A   S       method: action
+        //  =====================       ==============
+        //  -   -   -   +   -   -       22:  C += x, no S, C dense
+        //  -   -   -   +   A   -       23:  C += A, no S, C dense
+
+        ASSERT (C_is_dense) ;               // C is dense
+        ASSERT (whole_C_matrix) ;           // C(:,:) is modified
+        ASSERT (M == NULL) ;                // no mask present
+        ASSERT (accum != NULL) ;            // accum is present
+        ASSERT (!C_replace) ;               // C_replace is false
+        ASSERT (S == NULL) ;                // S is not used
+
+        if (scalar_expansion)
+        {
+            // Method 22: C(:,:) += x where C is dense
+            GBBURBLE ("Method 22: (C dense) += scalar ") ;
+            GB_OK (GB_dense_subassign_22 (C, scalar, atype, accum, Context)) ;
+        }
+        else
+        {
+            // Method 23: C(:,:) += A where C is dense
+            GBBURBLE ("Method 23: (C dense) += Z ") ;
+            GB_OK (GB_dense_subassign_23 (C, A, accum, Context)) ;
+        }
+
     }
     else if (C_Mask_scalar)
     {
@@ -762,6 +1062,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         //  =====================       ==============
         //  M   cmp rpl acc A   S       method: action
         //  =====================       ==============
+        //  M   -   -   -   -   -       05d: C(:,:)<M> = x, no S, C dense
+        //  M   -   -   -   -   -       05e: C(:,:)<M,s> = x, no S, C empty
         //  M   -   -   -   -   -       05:  C(I,J)<M> = x, no S
         //  M   -   -   +   -   -       07:  C(I,J)<M> += x, no S
 
@@ -771,18 +1073,38 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         ASSERT (S == NULL) ;                // S is not used
 
         if (accum == NULL)
-        { 
-            // Method 05: C(I,J)<M> = scalar ; no S
-            GB_OK (GB_subassign_05 (C,
-                I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                M, scalar, atype, Context)) ;
+        {
+            if (C_is_empty && whole_C_matrix && Mask_struct)
+            { 
+                // Method 05e: C(:,:)<M> = scalar ; no S; C empty, M structural
+                GBBURBLE ("Method 05e: (C empty)<M> = scalar ") ;
+                GB_OK (GB_subassign_05e (C, M, scalar, atype, Context)) ;
+            }
+            else if (C_is_dense && whole_C_matrix)
+            { 
+                // Method 05d: C(:,:)<M> = scalar ; no S; C is dense
+                GBBURBLE ("Method 05d: (C dense)<M> = scalar ") ;
+                GB_OK (GB_dense_subassign_05d (C,
+                    M, Mask_struct, scalar, atype, Context)) ;
+            }
+            else
+            { 
+                // Method 05: C(I,J)<M> = scalar ; no S
+                GBBURBLE ("Method 05: C(%s,%s)<M> = scalar ; no S ",
+                    Istring, Jstring) ;
+                GB_OK (GB_subassign_05 (C,
+                    I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
+                    M, Mask_struct, scalar, atype, Context)) ;
+            }
         }
         else
         { 
             // Method 07: C(I,J)<M> += scalar ; no S
+            GBBURBLE ("Method 07: C(%s,%s)<M> += scalar ; no S",
+                Istring, Jstring) ;
             GB_OK (GB_subassign_07 (C,
                 I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                M, accum, scalar, atype, Context)) ;
+                M, Mask_struct, accum, scalar, atype, Context)) ;
         }
 
     }
@@ -797,6 +1119,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         //  M   cmp rpl acc A   S       method: action
         //  =====================       ==============
         //  M   -   -   +   A   -       08:  C(I,J)<M> += A, no S
+        //  A   -   -   -   A   -       06d: C<A> = A, no S, C dense
+        //  M   -   x   -   A   -       25:  C<M,s> = A, A dense, C empty
         //  M   -   -   -   A   -       06n: C(I,J)<M> = A, no S
         //  M   -   -   -   A   S       06s: C(I,J)<M> = A, with S
 
@@ -807,24 +1131,41 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
         if (accum != NULL)
         { 
             // Method 08: C(I,J)<M> += A ; no S
+            GBBURBLE ("Method 08: C(%s,%s)<M> += Z ; no S ",
+                Istring, Jstring) ;
             ASSERT (S == NULL) ;
             GB_OK (GB_subassign_08 (C,
                 I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                M, accum, A, Context)) ;
+                M, Mask_struct, accum, A, Context)) ;
+        }
+        else if (C_is_dense && whole_C_matrix && M == A)
+        { 
+            // Method 06d: C(:,:)<A> = A ; no S, C dense
+            GBBURBLE ("Method 06d: (C dense)<Z> = Z ") ;
+            GB_OK (GB_dense_subassign_06d (C, A, Mask_struct, Context)) ;
+        }
+        else if (C_is_empty && whole_C_matrix && A_is_dense && Mask_struct)
+        { 
+            GBBURBLE ("Method 25: (C empty)<M> = (Z dense) ") ;
+            GB_OK (GB_dense_subassign_25 (C, M, A, Context)) ;
         }
         else if (S == NULL)
         { 
             // Method 06n: C(I,J)<M> = A ; no S
+            GBBURBLE ("Method 06n: C(%s,%s)<M> = Z ; no S ",
+                Istring, Jstring) ;
             GB_OK (GB_subassign_06n (C,
                 I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                M, A, Context)) ;
+                M, Mask_struct, A, Context)) ;
         }
         else
         { 
             // Method 06s: C(I,J)<M> = A ; using S
+            GBBURBLE ("Method 06s: C(%s,%s)<M> = Z ; using S ",
+                Istring, Jstring) ;
             GB_OK (GB_subassign_06s (C,
                 I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                M, A, S, Context)) ;
+                M, Mask_struct, A, S, Context)) ;
         }
 
     }
@@ -852,6 +1193,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (accum == NULL)
             { 
                 // Method 01: C(I,J) = scalar ; using S
+                GBBURBLE ("Method 01: C(%s,%s) = scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_01 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
                     scalar, atype, S, Context)) ;
@@ -859,6 +1202,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             else
             { 
                 // Method 03: C(I,J) += scalar ; using S
+                GBBURBLE ("Method 03: C(%s,%s) += scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_03 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
                     accum, scalar, atype, S, Context)) ;
@@ -869,6 +1214,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (accum == NULL)
             { 
                 // Method 02: C(I,J) = A ; using S
+                GBBURBLE ("Method 02: C(%s,%s) = Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_02 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
                     A, S, Context)) ;
@@ -876,6 +1223,8 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             else
             { 
                 // Method 04: C(I,J) += A ; using S
+                GBBURBLE ("Method 04: C(%s,%s) += Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_04 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
                     accum, A, S, Context)) ;
@@ -909,24 +1258,30 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (Mask_comp && C_replace)
             { 
                 // Method 17: C(I,J)<!M,repl> = scalar ; using S
+                GBBURBLE ("Method 17: C(%s,%s)<!M,repl> = scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_17 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, scalar, atype, S, Context)) ;
+                    M, Mask_struct, scalar, atype, S, Context)) ;
             }
             else if (Mask_comp)
             { 
                 // Method 13: C(I,J)<!M> = scalar ; using S
+                GBBURBLE ("Method 13: C(%s,%s)<!M> = scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_13 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, scalar, atype, S, Context)) ;
+                    M, Mask_struct, scalar, atype, S, Context)) ;
             }
             else // if (C_replace)
             { 
                 // Method 09: C(I,J)<M,repl> = scalar ; using S
+                GBBURBLE ("Method 09: C(%s,%s)<M,repl> = scalar ; using S ",
+                    Istring, Jstring) ;
                 ASSERT (C_replace) ;
                 GB_OK (GB_subassign_09 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, scalar, atype, S, Context)) ;
+                    M, Mask_struct, scalar, atype, S, Context)) ;
             }
         }
         else
@@ -934,24 +1289,30 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (Mask_comp && C_replace)
             { 
                 // Method 19: C(I,J)<!M,repl> += scalar ; using S
+                GBBURBLE ("Method 19: C(%s,%s)<!M,repl> += scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_19 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, scalar, atype, S, Context)) ;
+                    M, Mask_struct, accum, scalar, atype, S, Context)) ;
             }
             else if (Mask_comp)
             { 
                 // Method 15: C(I,J)<!M> += scalar ; using S
+                GBBURBLE ("Method 15: C(%s,%s)<!M> += scalar ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_15 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, scalar, atype, S, Context)) ;
+                    M, Mask_struct, accum, scalar, atype, S, Context)) ;
             }
             else // if (C_replace)
             { 
                 // Method 11: C(I,J)<M,repl> += scalar ; using S
+                GBBURBLE ("Method 11: C(%s,%s)<M,repl> += scalar ; using S ",
+                    Istring, Jstring) ;
                 ASSERT (C_replace) ;
                 GB_OK (GB_subassign_11 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, scalar, atype, S, Context)) ;
+                    M, Mask_struct, accum, scalar, atype, S, Context)) ;
             }
         }
 
@@ -981,24 +1342,30 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (Mask_comp && C_replace)
             { 
                 // Method 18: C(I,J)<!M,repl> = A ; using S
+                GBBURBLE ("Method 18: C(%s,%s)<!M,repl> = Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_18 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, A, S, Context)) ;
+                    M, Mask_struct, A, S, Context)) ;
             }
             else if (Mask_comp)
             { 
                 // Method 14: C(I,J)<!M> = A ; using S
+                GBBURBLE ("Method 14: C(%s,%s)<!M> = Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_14 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, A, S, Context)) ;
+                    M, Mask_struct, A, S, Context)) ;
             }
             else // if (C_replace)
             { 
                 // Method 10: C(I,J)<M,repl> = A ; using S
+                GBBURBLE ("Method 10: C(%s,%s)<M,repl> = Z ; using S ",
+                    Istring, Jstring) ;
                 ASSERT (C_replace) ;
                 GB_OK (GB_subassign_10 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, A, S, Context)) ;
+                    M, Mask_struct, A, S, Context)) ;
             }
         }
         else
@@ -1006,24 +1373,30 @@ GrB_Info GB_subassigner             // C(I,J)<#M> = A or accum (C (I,J), A)
             if (Mask_comp && C_replace)
             { 
                 // Method 20: C(I,J)<!M,repl> += A ; using S
+                GBBURBLE ("Method 20: C(%s,%s)<!M,repl> += Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_20 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, A, S, Context)) ;
+                    M, Mask_struct, accum, A, S, Context)) ;
             }
             else if (Mask_comp)
             { 
                 // Method 16: C(I,J)<!M> += A ; using S
+                GBBURBLE ("Method 16: C(%s,%s)<!M> += Z ; using S ",
+                    Istring, Jstring) ;
                 GB_OK (GB_subassign_16 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, A, S, Context)) ;
+                    M, Mask_struct, accum, A, S, Context)) ;
             }
             else // if (C_replace)
             { 
                 // Method 12: C(I,J)<M,repl> += A ; using S
+                GBBURBLE ("Method 12: C(%s,%s)<M,repl> += Z ; using S ",
+                    Istring, Jstring) ;
                 ASSERT (C_replace) ;
                 GB_OK (GB_subassign_12 (C,
                     I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
-                    M, accum, A, S, Context)) ;
+                    M, Mask_struct, accum, A, S, Context)) ;
             }
             // note that C(I,J)<M> += A always uses method 6b, without S.
         }
diff --git a/Source/GB_subref.c b/Source/GB_subref.c
index 5abb44877b..d67ddebe8a 100644
--- a/Source/GB_subref.c
+++ b/Source/GB_subref.c
@@ -2,7 +2,7 @@
 // GB_subref: C = A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subref.h b/Source/GB_subref.h
index 4bb8174cf6..4c2a752c84 100644
--- a/Source/GB_subref.h
+++ b/Source/GB_subref.h
@@ -2,7 +2,7 @@
 // GB_subref.h: definitions for GB_subref_* functions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subref_phase0.c b/Source/GB_subref_phase0.c
index 56a450e617..a306f7c04e 100644
--- a/Source/GB_subref_phase0.c
+++ b/Source/GB_subref_phase0.c
@@ -2,7 +2,7 @@
 // GB_subref_phase0: find vectors of C = A(I,J) and determine I,J properties
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -83,8 +83,8 @@ static inline void GB_find_Ap_start_end
         { 
             bool found, is_zombie ;
             int64_t pright = pA_end - 1 ;
-            GB_BINARY_SPLIT_ZOMBIE (imin, Ai, pA, pright, found, nzombies,
-                is_zombie) ;
+            GB_SPLIT_BINARY_SEARCH_ZOMBIE (imin, Ai,
+                pA, pright, found, nzombies, is_zombie) ;
         }
 
         // trim the trailing part of A (:,kA)
@@ -107,8 +107,8 @@ static inline void GB_find_Ap_start_end
             bool found, is_zombie ;
             int64_t pleft = pA ;
             int64_t pright = pA_end - 1 ;
-            GB_BINARY_SPLIT_ZOMBIE (imax, Ai, pleft, pright, found, nzombies,
-                is_zombie) ;
+            GB_SPLIT_BINARY_SEARCH_ZOMBIE (imax, Ai,
+                pleft, pright, found, nzombies, is_zombie) ;
             pA_end = (found) ? (pleft + 1) : pleft ;
         }
 
@@ -278,7 +278,7 @@ GrB_Info GB_subref_phase0
             bool found ;
             int64_t kleft = 0 ;
             int64_t kright = anvec-1 ;
-            GB_BINARY_SPLIT_SEARCH (jmin, Ah, kleft, kright, found) ;
+            GB_SPLIT_BINARY_SEARCH (jmin, Ah, kleft, kright, found) ;
             Ah += kleft ;
             Ap += kleft ;
             anvec -= kleft ;
@@ -293,7 +293,7 @@ GrB_Info GB_subref_phase0
             bool found ;
             int64_t kleft = 0 ;
             int64_t kright = anvec-1 ;
-            GB_BINARY_SPLIT_SEARCH (jmax, Ah, kleft, kright, found) ;
+            GB_SPLIT_BINARY_SEARCH (jmax, Ah, kleft, kright, found) ;
             anvec = (found) ? (kleft + 1) : kleft ;
         }
 
diff --git a/Source/GB_subref_phase1.c b/Source/GB_subref_phase1.c
index 8c21b168a4..a371323944 100644
--- a/Source/GB_subref_phase1.c
+++ b/Source/GB_subref_phase1.c
@@ -2,7 +2,7 @@
 // GB_subref_phase1: find # of entries in C=A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_subref_phase2.c b/Source/GB_subref_phase2.c
index 8412b099f1..a6fe616993 100644
--- a/Source/GB_subref_phase2.c
+++ b/Source/GB_subref_phase2.c
@@ -2,7 +2,7 @@
 // GB_subref_phase2: C=A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -116,6 +116,16 @@ GrB_Info GB_subref_phase2   // C=A(I,J)
     // remove empty vectors from C, if hypersparse
     //--------------------------------------------------------------------------
 
+    info = GB_hypermatrix_prune (C, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        GB_MATRIX_FREE (&C) ;
+        return (info) ;
+    }
+
+#if 0
+    // see GB_hypermatrix_prune
     if (C_is_hyper && C->nvec_nonempty < Cnvec)
     {
         // create new Cp_new and Ch_new arrays, with no empty vectors
@@ -139,6 +149,7 @@ GrB_Info GB_subref_phase2   // C=A(I,J)
         C->plen = nvec_new ;
         ASSERT (C->nvec == C->nvec_nonempty) ;
     }
+#endif
 
     //--------------------------------------------------------------------------
     // return result
diff --git a/Source/GB_subref_slice.c b/Source/GB_subref_slice.c
index 194c70fff5..805d9ba0a2 100644
--- a/Source/GB_subref_slice.c
+++ b/Source/GB_subref_slice.c
@@ -2,7 +2,7 @@
 // GB_subref_slice: construct coarse/fine tasks for C = A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -145,10 +145,6 @@ GrB_Info GB_subref_slice
     bool post_sort = false ;
     int64_t iinc = Icolon [GxB_INC] ;
 
-    // printf ("nI "GBd" avlen "GBd" anz "GBd"\n", nI, avlen, anz) ;
-    // printf ("I_inverse_limit "GBd"\n", I_inverse_limit) ;
-    // printf ("I inverse ok: %d\n", I_inverse_ok) ;
-
     //--------------------------------------------------------------------------
     // allocate workspace
     //--------------------------------------------------------------------------
@@ -167,12 +163,6 @@ GrB_Info GB_subref_slice
 
     int nthreads_for_Cwork = GB_nthreads (Cnvec, chunk, nthreads_max) ;
 
-    #ifdef GB_DEBUG
-    // For debugging only: record the methods used for each vector.
-    int64_t Hist [13] ;
-    for (int method = 0 ; method <= 12 ; method++) Hist [method] = 0 ;
-    #endif
-
     int64_t kC ;
     #pragma omp parallel for num_threads(nthreads_for_Cwork) schedule(static) \
         reduction(||:need_I_inverse)
@@ -194,31 +184,14 @@ GrB_Info GB_subref_slice
         // must be created.  The # of duplicates has no impact on the I inverse
         // decision, and a minor effect on the work (which is ignored).
 
-        #ifdef GB_DEBUG
-        int method =
-        #endif
         GB_subref_method (&work, &this_needs_I_inverse, alen, avlen,
             Ikind, nI, I_inverse_ok, need_qsort, iinc, 0) ;
-        #ifdef GB_DEBUG
-        #pragma omp atomic
-        Hist [method] ++ ;
-        #endif
 
         // log the result
         need_I_inverse = need_I_inverse || this_needs_I_inverse ;
         Cwork [kC] = work ;
     }
 
-    #ifdef GB_DEBUG
-    for (int method = 0 ; method <= 12 ; method++)
-    {
-        if (Hist [method] > 0)
-        {
-            // printf ("method %2d : "GBd"\n", method, Hist [method]) ;
-        }
-    }
-    #endif
-
     //--------------------------------------------------------------------------
     // replace Cwork with its cumulative sum
     //--------------------------------------------------------------------------
diff --git a/Source/GB_task_cumsum.c b/Source/GB_task_cumsum.c
index b3f548edd7..26a104477b 100644
--- a/Source/GB_task_cumsum.c
+++ b/Source/GB_task_cumsum.c
@@ -2,7 +2,7 @@
 // GB_task_cumsum: cumulative sum of Cp and fine tasks in TaskList
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -101,13 +101,11 @@ void GB_task_cumsum
     //--------------------------------------------------------------------------
 
     #ifdef GB_DEBUG
-    // printf ("\nnthreads %d ntasks %d Cnvec "GBd"\n",
     // nthreads, ntasks, Cnvec) ;
     for (int t = 0 ; t < ntasks ; t++)
     {
         int64_t k = TaskList [t].kfirst ;
         int64_t klast = TaskList [t].klast ;
-        // printf ("Task %d: kfirst "GBd" klast "GBd" ", t, k, klast) ;
         if (klast < 0)
         {
             // this is a fine task for vector k
@@ -119,9 +117,6 @@ void GB_task_cumsum
             int64_t pC_end = TaskList [t+1].pC ;
             int64_t pM     = TaskList [t].pM ;
             int64_t pM_end = TaskList [t].pM_end ;
-            // printf ("pA ["GBd":"GBd"-1] pB ["GBd":"GBd"-1] pC ["GBd":"GBd
-            // "-1] pM ["GBd":"GBd"-1] len "GBd"\n", pA, pA_end, pB, pB_end,
-            // pC, pC_end, pM, pM_end, TaskList [t].len) ;
             ASSERT (k >= 0 && k < Cnvec) ;
             // pA:(pA_end-1) must reside inside A(:,j), and pB:(pB_end-1) must
             // reside inside B(:,j), but these cannot be checked here since A
@@ -136,7 +131,6 @@ void GB_task_cumsum
         else
         {
             // this is a coarse task for vectors k:klast, inclusive
-            // printf ("\n") ;
             ASSERT (k >= 0 && k < Cnvec) ;
             ASSERT (klast >= 0 && klast <= Cnvec) ;
             ASSERT (k <= klast) ;
diff --git a/Source/GB_thread_local.c b/Source/GB_thread_local.c
index c116508773..465017a51f 100644
--- a/Source/GB_thread_local.c
+++ b/Source/GB_thread_local.c
@@ -2,7 +2,7 @@
 // GB_thread_local: manage thread-local storage
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_thread_local.h b/Source/GB_thread_local.h
index 409340b0aa..9687c3b3c8 100644
--- a/Source/GB_thread_local.h
+++ b/Source/GB_thread_local.h
@@ -2,7 +2,7 @@
 // GB_thread_local.h: definitions for thread local storage
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_to_hyper.c b/Source/GB_to_hyper.c
index 6142ae2525..b49c24051c 100644
--- a/Source/GB_to_hyper.c
+++ b/Source/GB_to_hyper.c
@@ -2,7 +2,7 @@
 // GB_to_hyper: convert a matrix to hyperspasre
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -68,7 +68,7 @@ GrB_Info GB_to_hyper        // convert a matrix to hypersparse
         int64_t *GB_RESTRICT Count ;
         GB_MALLOC_MEMORY (Count, ntasks+1, sizeof (int64_t)) ;
         if (Count == NULL)
-        {
+        { 
             // out of memory
             GB_PHIX_FREE (A) ;
             return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_to_hyper_conform.c b/Source/GB_to_hyper_conform.c
index f3b738e537..7892a9ae94 100644
--- a/Source/GB_to_hyper_conform.c
+++ b/Source/GB_to_hyper_conform.c
@@ -2,7 +2,7 @@
 // GB_to_hyper_conform: conform a matrix to its desired hypersparse format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_to_hyper_test.c b/Source/GB_to_hyper_test.c
index 768a38ea3b..ec7055d66c 100644
--- a/Source/GB_to_hyper_test.c
+++ b/Source/GB_to_hyper_test.c
@@ -2,7 +2,7 @@
 // GB_to_hyper_test: test if a matrix should convert to hyperspasre
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_to_nonhyper.c b/Source/GB_to_nonhyper.c
index db7ac98533..a1a342963e 100644
--- a/Source/GB_to_nonhyper.c
+++ b/Source/GB_to_nonhyper.c
@@ -2,7 +2,7 @@
 // GB_to_nonhyper: convert a matrix to non-hypersparse form
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -98,7 +98,7 @@ GrB_Info GB_to_nonhyper     // convert a matrix to non-hypersparse
 
             // task tid computes Ap_new [jstart:jend-1] from Ap_old, Ah_old.
 
-            // GB_BINARY_SPLIT_SEARCH of Ah_old [0..nvec-1] for jstart:
+            // GB_SPLIT_BINARY_SEARCH of Ah_old [0..nvec-1] for jstart:
             // If found is true then Ah_old [k] == jstart.
             // If found is false, and nvec > 0 then
             //    Ah_old [0 ... k-1] < jstart <  Ah_old [k ... nvec-1]
@@ -110,7 +110,7 @@ GrB_Info GB_to_nonhyper     // convert a matrix to non-hypersparse
 
             int64_t k = 0, pright = nvec-1 ;
             bool found ;
-            GB_BINARY_SPLIT_SEARCH (jstart, Ah_old, k, pright, found) ;
+            GB_SPLIT_BINARY_SEARCH (jstart, Ah_old, k, pright, found) ;
             ASSERT (k >= 0 && k <= nvec) ;
             ASSERT (GB_IMPLIES (nvec == 0, !found && k == 0)) ;
             ASSERT (GB_IMPLIES (found, jstart == Ah_old [k])) ;
diff --git a/Source/GB_to_nonhyper_test.c b/Source/GB_to_nonhyper_test.c
index 99323a62ef..7352d3a77a 100644
--- a/Source/GB_to_nonhyper_test.c
+++ b/Source/GB_to_nonhyper_test.c
@@ -2,7 +2,7 @@
 // GB_to_nonhyper_test: test if a matrix should convert to non-hyperspasre
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_transplant.c b/Source/GB_transplant.c
index 6fd1fc2332..11b78bd22e 100644
--- a/Source/GB_transplant.c
+++ b/Source/GB_transplant.c
@@ -2,7 +2,7 @@
 // GB_transplant: replace contents of one matrix with another
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -51,6 +51,9 @@ GrB_Info GB_transplant          // transplant one matrix into another
     // the ctype and A->type must be compatible.  C->type is ignored
     ASSERT (GB_Type_compatible (ctype, A->type)) ;
 
+    int64_t avdim = A->vdim ;
+    int64_t avlen = A->vlen ;
+
     //--------------------------------------------------------------------------
     // determine the number of threads to use
     //--------------------------------------------------------------------------
@@ -61,6 +64,50 @@ GrB_Info GB_transplant          // transplant one matrix into another
     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
     int nthreads = GB_nthreads (anz + anvec, chunk, nthreads_max) ;
 
+    //--------------------------------------------------------------------------
+    // save prior pattern of C, if dense
+    //--------------------------------------------------------------------------
+
+    bool A_is_dense = GB_is_dense (A) ;
+
+    bool keep_Cp_and_Ci =               // keep C->p and C->i if:
+        (
+            GB_is_dense (C)             //      both A and C are dense
+            && A_is_dense
+            && !GB_ZOMBIES (C)          //      neither have zombies
+            && !GB_ZOMBIES (A)
+            && !(C->p_shallow)          //      Cp and Ci are not shallow
+            && !(C->i_shallow)
+            && !C->is_hyper             //      both A and C are standard
+            && !A->is_hyper
+            && C->vdim == avdim         //      A and C have the same size
+            && C->vlen == avlen
+            && C->is_csc == A->is_csc   //      A and C have the same format
+            && C->p != NULL         
+            && C->i != NULL             //      Cp and Ci exist
+        ) ;
+
+    int64_t *GB_RESTRICT Cp_keep = NULL ;
+    int64_t *GB_RESTRICT Ci_keep = NULL ;
+    int64_t cplen_keep = 0 ;
+    int64_t cnvec_keep = 0 ;
+    int64_t cnzmax_keep = 0 ;
+
+    if (keep_Cp_and_Ci)
+    { 
+        // Keep C->p and C->i by removing them from C.  They already contain
+        // the right pattern for a dense matrix C.  No need to free it and
+        // recreate the same thing.
+        GBBURBLE ("(remains dense) ") ;
+        Cp_keep = C->p ;
+        Ci_keep = C->i ;
+        cplen_keep = C->plen ;
+        cnvec_keep = C->nvec ;
+        cnzmax_keep = C->nzmax ;
+        C->p = NULL ;
+        C->i = NULL ;
+    }
+
     //--------------------------------------------------------------------------
     // clear C and transplant the type, size, and hypersparsity
     //--------------------------------------------------------------------------
@@ -77,8 +124,8 @@ GrB_Info GB_transplant          // transplant one matrix into another
     C->type_size = ctype->size ;
     C->is_csc = A->is_csc ;
     C->is_hyper = A->is_hyper ;
-    C->vlen = A->vlen ;
-    C->vdim = A->vdim ;
+    C->vlen = avlen ;
+    C->vdim = avdim ;
     ASSERT (A->nvec_nonempty == -1 ||   // can be postponed
             A->nvec_nonempty == GB_nvec_nonempty (A, Context)) ;
     C->nvec_nonempty = A->nvec_nonempty ;
@@ -93,13 +140,32 @@ GrB_Info GB_transplant          // transplant one matrix into another
     // transplant A->p vector pointers and A->h hyperlist
     //--------------------------------------------------------------------------
 
-    if (A->p_shallow || A->h_shallow)
+    if (keep_Cp_and_Ci)
+    { 
+
+        //----------------------------------------------------------------------
+        // keep existing C->p
+        //----------------------------------------------------------------------
+
+        C->p = Cp_keep ;
+        Cp_keep = NULL ;
+        C->h = NULL ;
+        C->plen = cplen_keep ;
+        C->nvec = cnvec_keep ;
+
+        // free any non-shallow A->p and A->h content of A
+        GB_ph_free (A) ;
+
+    }
+    else if (A->p_shallow || A->h_shallow)
     {
 
         //----------------------------------------------------------------------
         // A->p or A->h are shallow copies another matrix; make a deep copy
         //----------------------------------------------------------------------
 
+        int nth = GB_nthreads (anvec, chunk, nthreads_max) ;
+
         if (A->is_hyper)
         {
             // A is hypersparse, create new C->p and C->h
@@ -116,14 +182,14 @@ GrB_Info GB_transplant          // transplant one matrix into another
             }
 
             // copy A->p and A->h into the newly created C->p and C->h
-            GB_memcpy (C->p, A->p, (anvec+1) * sizeof (int64_t), nthreads) ;
-            GB_memcpy (C->h, A->h,  anvec    * sizeof (int64_t), nthreads) ;
+            GB_memcpy (C->p, A->p, (anvec+1) * sizeof (int64_t), nth) ;
+            GB_memcpy (C->h, A->h,  anvec    * sizeof (int64_t), nth) ;
         }
         else
         {
             // A is non-hypersparse, create new C->p
-            C->plen = A->vdim ;
-            C->nvec = A->vdim ;
+            C->plen = avdim ;
+            C->nvec = avdim ;
             GB_MALLOC_MEMORY (C->p, C->plen+1, sizeof (int64_t)) ;
             if (C->p == NULL)
             { 
@@ -133,8 +199,22 @@ GrB_Info GB_transplant          // transplant one matrix into another
                 return (GB_OUT_OF_MEMORY) ;
             }
 
-            // copy A->p into the newly created C->p
-            GB_memcpy (C->p, A->p, (A->vdim+1) * sizeof (int64_t), nthreads) ;
+            if (A_is_dense)
+            {
+                // create C->p for a dense matrix C
+                int64_t *GB_RESTRICT Cp = C->p ;
+                int64_t k ;
+                #pragma omp parallel for num_threads(nth) schedule(static)
+                for (k = 0 ; k <= avdim ; k++)
+                { 
+                    Cp [k] = k * avlen ;
+                }
+            }
+            else
+            { 
+                // copy A->p into the newly created C->p
+                GB_memcpy (C->p, A->p, (avdim+1) * sizeof (int64_t), nth) ;
+            }
         }
 
         // free any non-shallow A->p and A->h content of A
@@ -171,6 +251,8 @@ GrB_Info GB_transplant          // transplant one matrix into another
     if (anz == 0)
     { 
         // quick return if A has no entries
+        // Ci_keep is not needed after all, since C is empty
+        GB_FREE_MEMORY (Ci_keep, cnzmax_keep, sizeof (int64_t)) ;
         ASSERT_MATRIX_OK (C, "C empty transplant", GB0) ;
         GB_MATRIX_FREE (Ahandle) ;
         return (GrB_SUCCESS) ;
@@ -184,8 +266,10 @@ GrB_Info GB_transplant          // transplant one matrix into another
     // is set to their minimum size.  Otherwise, if both C->x and C->i can
     // be transplanted from A, then they inherit the nzmax of A.
 
+    // Do not allocate C->i if the pattern of a dense matrix C is being kept.
+
     ASSERT (C->x == NULL && C->i == NULL) ;
-    bool allocate_Ci = (A->i_shallow) ;
+    bool allocate_Ci = (A->i_shallow) && !keep_Cp_and_Ci ;
     bool allocate_Cx = (A->x_shallow || C->type != A->type) ;
     C->nzmax = (allocate_Cx || allocate_Ci) ? anz : A->nzmax ;
     C->nzmax = GB_IMAX (C->nzmax, 1) ;
@@ -201,6 +285,7 @@ GrB_Info GB_transplant          // transplant one matrix into another
 
     if (allocate_Ci)
     { 
+
         // allocate new C->i component
         GB_MALLOC_MEMORY (C->i, C->nzmax, sizeof (int64_t)) ;
         ok = ok && (C->i != NULL) ;
@@ -211,6 +296,7 @@ GrB_Info GB_transplant          // transplant one matrix into another
         // out of memory
         GB_PHIX_FREE (C) ;
         GB_MATRIX_FREE (Ahandle) ;
+        GB_FREE_MEMORY (Ci_keep, cnzmax_keep, sizeof (int64_t)) ;
         return (GB_OUT_OF_MEMORY) ;
     }
 
@@ -218,6 +304,13 @@ GrB_Info GB_transplant          // transplant one matrix into another
     // transplant or copy A->x numerical values
     //--------------------------------------------------------------------------
 
+    // Note that A may contain zombies, and the values of these zombies may be
+    // uninitialized values in A->x.  All entries are typecasted or memcpy'ed
+    // from A->x to C->x, both zombies and live entries alike.  valgrind may
+    // complain about typecasting these uninitialized values, but these
+    // warnings are false positives.  The output of the typecasting is itself a
+    // zombie, and the values of all zombies are ignored.
+
     ASSERT_TYPE_OK (C->type, "target C->type for values", GB0) ;
     ASSERT_TYPE_OK (A->type, "source A->type for values", GB0) ;
 
@@ -239,7 +332,7 @@ GrB_Info GB_transplant          // transplant one matrix into another
     }
     else
     {
-        // types differ, must typecast from A to C
+        // types differ, must typecast from A to C.
         GB_cast_array (C->x, C->type->code, A->x, A->type->code, anz, Context) ;
         if (!A->x_shallow)
         { 
@@ -258,26 +351,61 @@ GrB_Info GB_transplant          // transplant one matrix into another
     // transplant or copy A->i row indices
     //--------------------------------------------------------------------------
 
-    if (A->i_shallow)
+    if (keep_Cp_and_Ci)
     { 
+
+        //----------------------------------------------------------------------
+        // keep existing C->i
+        //----------------------------------------------------------------------
+
+        // C is dense; restore the prior C->i.  A->i will be freed
+        C->i = Ci_keep ;
+        Ci_keep = NULL ;
+
+    }
+    else if (A->i_shallow)
+    {
+
+        //----------------------------------------------------------------------
         // A->i is a shallow copy of another matrix, so we need a deep copy
-        GB_memcpy (C->i, A->i, anz * sizeof (int64_t), nthreads) ;
+        //----------------------------------------------------------------------
+
+        if (A_is_dense && !GB_ZOMBIES (A))
+        {
+            // create C->i for a dense matrix C
+            int64_t *GB_RESTRICT Ci = C->i ;
+            int64_t pC ;
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (pC = 0 ; pC < anz ; pC++)
+            { 
+                Ci [pC] = pC % avlen ;
+            }
+        }
+        else
+        { 
+            // copy A->i into C->i
+            GB_memcpy (C->i, A->i, anz * sizeof (int64_t), nthreads) ;
+        }
         A->i = NULL ;
+        A->i_shallow = false ;
+
     }
     else
     { 
-        // A->i is not shallow, so just copy the pointer from A to C
+
+        //----------------------------------------------------------------------
+        // A->i is not shallow, so just transplant the pointer from A to C
+        //----------------------------------------------------------------------
+
         C->i = A->i ;
         A->i = NULL ;
+        A->i_shallow = false ;
     }
 
-    ASSERT (A->i == NULL) ;         // has been freed or removed
-    A->i_shallow = false ;
-
     ASSERT (C->i != NULL) ;
     C->i_shallow = false ;
 
-    C->nzombies = A->nzombies ;     // zombies have been transplanted into C
+    C->nzombies = A->nzombies ;     // zombies may have been transplanted into C
     GB_CRITICAL (GB_queue_insert (C)) ;
 
     //--------------------------------------------------------------------------
diff --git a/Source/GB_transplant_conform.c b/Source/GB_transplant_conform.c
index d85ce2796e..ce766cb9f0 100644
--- a/Source/GB_transplant_conform.c
+++ b/Source/GB_transplant_conform.c
@@ -2,7 +2,7 @@
 // GB_transplant_conform: transplant T into C, then conform C
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_transpose.c b/Source/GB_transpose.c
index 4c321132c2..30c40bb86e 100644
--- a/Source/GB_transpose.c
+++ b/Source/GB_transpose.c
@@ -2,7 +2,7 @@
 // GB_transpose:  C=A' or C=op(A'), with typecasting
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -226,7 +226,7 @@ GrB_Info GB_transpose           // C=A', C=(ctype)A or C=op(A')
         // Count is only used in one case below
         GB_CALLOC_MEMORY (Count, ntasks+1, sizeof (int64_t)) ;
         if (Count == NULL)
-        {
+        { 
             // out of memory
             GB_FREE_C ;
             return (GB_OUT_OF_MEMORY) ;
@@ -294,7 +294,6 @@ GrB_Info GB_transpose           // C=A', C=(ctype)A or C=op(A')
         // returned.
         GB_CREATE (Chandle, ctype, avdim, avlen, GB_Ap_calloc,
             C_is_csc, GB_FORCE_HYPER, A_hyper_ratio, 1, 1, true, Context) ;
-
         if (info != GrB_SUCCESS)
         { 
             // out of memory
diff --git a/Source/GB_transpose.h b/Source/GB_transpose.h
index 1dad17c280..45f996145c 100644
--- a/Source/GB_transpose.h
+++ b/Source/GB_transpose.h
@@ -2,7 +2,7 @@
 // GB_transpose.h:  definitions for GB_transpose
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_transpose_bucket.c b/Source/GB_transpose_bucket.c
index 175867d686..a4d96315c4 100644
--- a/Source/GB_transpose_bucket.c
+++ b/Source/GB_transpose_bucket.c
@@ -2,7 +2,7 @@
 // GB_transpose_bucket: transpose and optionally typecast and/or apply operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -138,7 +138,7 @@ GrB_Info GB_transpose_bucket    // bucket transpose; typecast and apply op
 
     GB_CALLOC_MEMORY (Rowcounts, naslice, sizeof (int64_t *)) ;
     if (Rowcounts == NULL)
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
@@ -164,7 +164,7 @@ GrB_Info GB_transpose_bucket    // bucket transpose; typecast and apply op
     // create the iterator for A
     GBI_single_iterator Iter ;
     if (!GB_pslice (&A_slice, /* A */ A->p, A->nvec, naslice))
-    {
+    { 
         // out of memory
         GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
diff --git a/Source/GB_transpose_ix.c b/Source/GB_transpose_ix.c
index 97dd828eba..06c284770a 100644
--- a/Source/GB_transpose_ix.c
+++ b/Source/GB_transpose_ix.c
@@ -2,7 +2,7 @@
 // GB_transpose_ix: transpose the values and pattern of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,9 +22,9 @@ void GB_transpose_ix            // transpose the pattern and values of a matrix
 (
     GrB_Matrix C,                       // output matrix
     const GrB_Matrix A,                 // input matrix
-    int64_t *GB_RESTRICT *Rowcounts,       // Rowcounts [naslice]
+    int64_t *GB_RESTRICT *Rowcounts,    // Rowcounts [naslice]
     GBI_single_iterator Iter,           // iterator for the matrix A
-    const int64_t *GB_RESTRICT A_slice,    // defines how A is sliced
+    const int64_t *GB_RESTRICT A_slice, // defines how A is sliced
     int naslice                         // # of slices of A
 )
 { 
@@ -62,6 +62,8 @@ void GB_transpose_ix            // transpose the pattern and values of a matrix
     // generic worker: transpose and typecast
     //--------------------------------------------------------------------------
 
+    GB_BURBLE_MATRIX (A, "generic ") ;
+
     size_t asize = A->type->size ;
     size_t csize = C->type->size ;
     GB_cast_function cast_A_to_X = GB_cast_factory (code1, code2) ;
diff --git a/Source/GB_transpose_op.c b/Source/GB_transpose_op.c
index ce631ef23c..af1c7e0b4b 100644
--- a/Source/GB_transpose_op.c
+++ b/Source/GB_transpose_op.c
@@ -2,7 +2,7 @@
 // GB_transpose_op: transpose, typecast, and apply an operator to a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -62,6 +62,8 @@ void GB_transpose_op    // transpose, typecast, and apply operator to a matrix
     // generic worker: transpose, typecast, and apply an operator
     //--------------------------------------------------------------------------
 
+    GB_BURBLE_MATRIX (A, "generic ") ;
+
     size_t asize = Atype->size ;
     size_t zsize = op->ztype->size ;
     size_t xsize = op->xtype->size ;
diff --git a/Source/GB_unused.h b/Source/GB_unused.h
index 8ccebd3f80..efc5574189 100644
--- a/Source/GB_unused.h
+++ b/Source/GB_unused.h
@@ -2,7 +2,7 @@
 // GB_unused.h: pragmas to disable compiler warnings
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GB_user.h b/Source/GB_user.h
deleted file mode 100644
index 202ac68ec6..0000000000
--- a/Source/GB_user.h
+++ /dev/null
@@ -1,1919 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_user.h: definitions for compile-time user-defined objects
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// Definitions of built-in types and functions, which can be referenced
-// by user-defined objects constructed at compile-time.
-
-#ifndef GB_USER_H
-#define GB_USER_H
-
-//------------------------------------------------------
-// built-in types
-//------------------------------------------------------
-
-#define GB_DEF_GrB_BOOL_type bool
-#define GB_DEF_GrB_INT8_type int8_t
-#define GB_DEF_GrB_UINT8_type uint8_t
-#define GB_DEF_GrB_INT16_type int16_t
-#define GB_DEF_GrB_UINT16_type uint16_t
-#define GB_DEF_GrB_INT32_type int32_t
-#define GB_DEF_GrB_UINT32_type uint32_t
-#define GB_DEF_GrB_INT64_type int64_t
-#define GB_DEF_GrB_UINT64_type uint64_t
-#define GB_DEF_GrB_FP32_type float
-#define GB_DEF_GrB_FP64_type double
-
-//------------------------------------------------------
-// built-in unary operators
-//------------------------------------------------------
-
-// op: IDENTITY
-#define GB_DEF_GrB_IDENTITY_BOOL_function GB_IDENTITY_f_BOOL
-#define GB_DEF_GrB_IDENTITY_BOOL_ztype bool
-#define GB_DEF_GrB_IDENTITY_BOOL_xtype bool
-
-#define GB_DEF_GrB_IDENTITY_INT8_function GB_IDENTITY_f_INT8
-#define GB_DEF_GrB_IDENTITY_INT8_ztype int8_t
-#define GB_DEF_GrB_IDENTITY_INT8_xtype int8_t
-
-#define GB_DEF_GrB_IDENTITY_UINT8_function GB_IDENTITY_f_UINT8
-#define GB_DEF_GrB_IDENTITY_UINT8_ztype uint8_t
-#define GB_DEF_GrB_IDENTITY_UINT8_xtype uint8_t
-
-#define GB_DEF_GrB_IDENTITY_INT16_function GB_IDENTITY_f_INT16
-#define GB_DEF_GrB_IDENTITY_INT16_ztype int16_t
-#define GB_DEF_GrB_IDENTITY_INT16_xtype int16_t
-
-#define GB_DEF_GrB_IDENTITY_UINT16_function GB_IDENTITY_f_UINT16
-#define GB_DEF_GrB_IDENTITY_UINT16_ztype uint16_t
-#define GB_DEF_GrB_IDENTITY_UINT16_xtype uint16_t
-
-#define GB_DEF_GrB_IDENTITY_INT32_function GB_IDENTITY_f_INT32
-#define GB_DEF_GrB_IDENTITY_INT32_ztype int32_t
-#define GB_DEF_GrB_IDENTITY_INT32_xtype int32_t
-
-#define GB_DEF_GrB_IDENTITY_UINT32_function GB_IDENTITY_f_UINT32
-#define GB_DEF_GrB_IDENTITY_UINT32_ztype uint32_t
-#define GB_DEF_GrB_IDENTITY_UINT32_xtype uint32_t
-
-#define GB_DEF_GrB_IDENTITY_INT64_function GB_IDENTITY_f_INT64
-#define GB_DEF_GrB_IDENTITY_INT64_ztype int64_t
-#define GB_DEF_GrB_IDENTITY_INT64_xtype int64_t
-
-#define GB_DEF_GrB_IDENTITY_UINT64_function GB_IDENTITY_f_UINT64
-#define GB_DEF_GrB_IDENTITY_UINT64_ztype uint64_t
-#define GB_DEF_GrB_IDENTITY_UINT64_xtype uint64_t
-
-#define GB_DEF_GrB_IDENTITY_FP32_function GB_IDENTITY_f_FP32
-#define GB_DEF_GrB_IDENTITY_FP32_ztype float
-#define GB_DEF_GrB_IDENTITY_FP32_xtype float
-
-#define GB_DEF_GrB_IDENTITY_FP64_function GB_IDENTITY_f_FP64
-#define GB_DEF_GrB_IDENTITY_FP64_ztype double
-#define GB_DEF_GrB_IDENTITY_FP64_xtype double
-
-// op: AINV
-#define GB_DEF_GrB_AINV_BOOL_function GB_AINV_f_BOOL
-#define GB_DEF_GrB_AINV_BOOL_ztype bool
-#define GB_DEF_GrB_AINV_BOOL_xtype bool
-
-#define GB_DEF_GrB_AINV_INT8_function GB_AINV_f_INT8
-#define GB_DEF_GrB_AINV_INT8_ztype int8_t
-#define GB_DEF_GrB_AINV_INT8_xtype int8_t
-
-#define GB_DEF_GrB_AINV_UINT8_function GB_AINV_f_UINT8
-#define GB_DEF_GrB_AINV_UINT8_ztype uint8_t
-#define GB_DEF_GrB_AINV_UINT8_xtype uint8_t
-
-#define GB_DEF_GrB_AINV_INT16_function GB_AINV_f_INT16
-#define GB_DEF_GrB_AINV_INT16_ztype int16_t
-#define GB_DEF_GrB_AINV_INT16_xtype int16_t
-
-#define GB_DEF_GrB_AINV_UINT16_function GB_AINV_f_UINT16
-#define GB_DEF_GrB_AINV_UINT16_ztype uint16_t
-#define GB_DEF_GrB_AINV_UINT16_xtype uint16_t
-
-#define GB_DEF_GrB_AINV_INT32_function GB_AINV_f_INT32
-#define GB_DEF_GrB_AINV_INT32_ztype int32_t
-#define GB_DEF_GrB_AINV_INT32_xtype int32_t
-
-#define GB_DEF_GrB_AINV_UINT32_function GB_AINV_f_UINT32
-#define GB_DEF_GrB_AINV_UINT32_ztype uint32_t
-#define GB_DEF_GrB_AINV_UINT32_xtype uint32_t
-
-#define GB_DEF_GrB_AINV_INT64_function GB_AINV_f_INT64
-#define GB_DEF_GrB_AINV_INT64_ztype int64_t
-#define GB_DEF_GrB_AINV_INT64_xtype int64_t
-
-#define GB_DEF_GrB_AINV_UINT64_function GB_AINV_f_UINT64
-#define GB_DEF_GrB_AINV_UINT64_ztype uint64_t
-#define GB_DEF_GrB_AINV_UINT64_xtype uint64_t
-
-#define GB_DEF_GrB_AINV_FP32_function GB_AINV_f_FP32
-#define GB_DEF_GrB_AINV_FP32_ztype float
-#define GB_DEF_GrB_AINV_FP32_xtype float
-
-#define GB_DEF_GrB_AINV_FP64_function GB_AINV_f_FP64
-#define GB_DEF_GrB_AINV_FP64_ztype double
-#define GB_DEF_GrB_AINV_FP64_xtype double
-
-// op: MINV
-#define GB_DEF_GrB_MINV_BOOL_function GB_MINV_f_BOOL
-#define GB_DEF_GrB_MINV_BOOL_ztype bool
-#define GB_DEF_GrB_MINV_BOOL_xtype bool
-
-#define GB_DEF_GrB_MINV_INT8_function GB_MINV_f_INT8
-#define GB_DEF_GrB_MINV_INT8_ztype int8_t
-#define GB_DEF_GrB_MINV_INT8_xtype int8_t
-
-#define GB_DEF_GrB_MINV_UINT8_function GB_MINV_f_UINT8
-#define GB_DEF_GrB_MINV_UINT8_ztype uint8_t
-#define GB_DEF_GrB_MINV_UINT8_xtype uint8_t
-
-#define GB_DEF_GrB_MINV_INT16_function GB_MINV_f_INT16
-#define GB_DEF_GrB_MINV_INT16_ztype int16_t
-#define GB_DEF_GrB_MINV_INT16_xtype int16_t
-
-#define GB_DEF_GrB_MINV_UINT16_function GB_MINV_f_UINT16
-#define GB_DEF_GrB_MINV_UINT16_ztype uint16_t
-#define GB_DEF_GrB_MINV_UINT16_xtype uint16_t
-
-#define GB_DEF_GrB_MINV_INT32_function GB_MINV_f_INT32
-#define GB_DEF_GrB_MINV_INT32_ztype int32_t
-#define GB_DEF_GrB_MINV_INT32_xtype int32_t
-
-#define GB_DEF_GrB_MINV_UINT32_function GB_MINV_f_UINT32
-#define GB_DEF_GrB_MINV_UINT32_ztype uint32_t
-#define GB_DEF_GrB_MINV_UINT32_xtype uint32_t
-
-#define GB_DEF_GrB_MINV_INT64_function GB_MINV_f_INT64
-#define GB_DEF_GrB_MINV_INT64_ztype int64_t
-#define GB_DEF_GrB_MINV_INT64_xtype int64_t
-
-#define GB_DEF_GrB_MINV_UINT64_function GB_MINV_f_UINT64
-#define GB_DEF_GrB_MINV_UINT64_ztype uint64_t
-#define GB_DEF_GrB_MINV_UINT64_xtype uint64_t
-
-#define GB_DEF_GrB_MINV_FP32_function GB_MINV_f_FP32
-#define GB_DEF_GrB_MINV_FP32_ztype float
-#define GB_DEF_GrB_MINV_FP32_xtype float
-
-#define GB_DEF_GrB_MINV_FP64_function GB_MINV_f_FP64
-#define GB_DEF_GrB_MINV_FP64_ztype double
-#define GB_DEF_GrB_MINV_FP64_xtype double
-
-// op: LNOT
-#define GB_DEF_GrB_LNOT_function GB_LNOT_f_BOOL
-#define GB_DEF_GrB_LNOT_ztype bool
-#define GB_DEF_GrB_LNOT_xtype bool
-
-#define GB_DEF_GxB_LNOT_BOOL_function GB_LNOT_f_BOOL
-#define GB_DEF_GxB_LNOT_BOOL_ztype bool
-#define GB_DEF_GxB_LNOT_BOOL_xtype bool
-
-#define GB_DEF_GxB_LNOT_INT8_function GB_LNOT_f_INT8
-#define GB_DEF_GxB_LNOT_INT8_ztype int8_t
-#define GB_DEF_GxB_LNOT_INT8_xtype int8_t
-
-#define GB_DEF_GxB_LNOT_UINT8_function GB_LNOT_f_UINT8
-#define GB_DEF_GxB_LNOT_UINT8_ztype uint8_t
-#define GB_DEF_GxB_LNOT_UINT8_xtype uint8_t
-
-#define GB_DEF_GxB_LNOT_INT16_function GB_LNOT_f_INT16
-#define GB_DEF_GxB_LNOT_INT16_ztype int16_t
-#define GB_DEF_GxB_LNOT_INT16_xtype int16_t
-
-#define GB_DEF_GxB_LNOT_UINT16_function GB_LNOT_f_UINT16
-#define GB_DEF_GxB_LNOT_UINT16_ztype uint16_t
-#define GB_DEF_GxB_LNOT_UINT16_xtype uint16_t
-
-#define GB_DEF_GxB_LNOT_INT32_function GB_LNOT_f_INT32
-#define GB_DEF_GxB_LNOT_INT32_ztype int32_t
-#define GB_DEF_GxB_LNOT_INT32_xtype int32_t
-
-#define GB_DEF_GxB_LNOT_UINT32_function GB_LNOT_f_UINT32
-#define GB_DEF_GxB_LNOT_UINT32_ztype uint32_t
-#define GB_DEF_GxB_LNOT_UINT32_xtype uint32_t
-
-#define GB_DEF_GxB_LNOT_INT64_function GB_LNOT_f_INT64
-#define GB_DEF_GxB_LNOT_INT64_ztype int64_t
-#define GB_DEF_GxB_LNOT_INT64_xtype int64_t
-
-#define GB_DEF_GxB_LNOT_UINT64_function GB_LNOT_f_UINT64
-#define GB_DEF_GxB_LNOT_UINT64_ztype uint64_t
-#define GB_DEF_GxB_LNOT_UINT64_xtype uint64_t
-
-#define GB_DEF_GxB_LNOT_FP32_function GB_LNOT_f_FP32
-#define GB_DEF_GxB_LNOT_FP32_ztype float
-#define GB_DEF_GxB_LNOT_FP32_xtype float
-
-#define GB_DEF_GxB_LNOT_FP64_function GB_LNOT_f_FP64
-#define GB_DEF_GxB_LNOT_FP64_ztype double
-#define GB_DEF_GxB_LNOT_FP64_xtype double
-
-// op: ONE
-#define GB_DEF_GxB_ONE_BOOL_function GB_ONE_f_BOOL
-#define GB_DEF_GxB_ONE_BOOL_ztype bool
-#define GB_DEF_GxB_ONE_BOOL_xtype bool
-
-#define GB_DEF_GxB_ONE_INT8_function GB_ONE_f_INT8
-#define GB_DEF_GxB_ONE_INT8_ztype int8_t
-#define GB_DEF_GxB_ONE_INT8_xtype int8_t
-
-#define GB_DEF_GxB_ONE_UINT8_function GB_ONE_f_UINT8
-#define GB_DEF_GxB_ONE_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ONE_UINT8_xtype uint8_t
-
-#define GB_DEF_GxB_ONE_INT16_function GB_ONE_f_INT16
-#define GB_DEF_GxB_ONE_INT16_ztype int16_t
-#define GB_DEF_GxB_ONE_INT16_xtype int16_t
-
-#define GB_DEF_GxB_ONE_UINT16_function GB_ONE_f_UINT16
-#define GB_DEF_GxB_ONE_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ONE_UINT16_xtype uint16_t
-
-#define GB_DEF_GxB_ONE_INT32_function GB_ONE_f_INT32
-#define GB_DEF_GxB_ONE_INT32_ztype int32_t
-#define GB_DEF_GxB_ONE_INT32_xtype int32_t
-
-#define GB_DEF_GxB_ONE_UINT32_function GB_ONE_f_UINT32
-#define GB_DEF_GxB_ONE_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ONE_UINT32_xtype uint32_t
-
-#define GB_DEF_GxB_ONE_INT64_function GB_ONE_f_INT64
-#define GB_DEF_GxB_ONE_INT64_ztype int64_t
-#define GB_DEF_GxB_ONE_INT64_xtype int64_t
-
-#define GB_DEF_GxB_ONE_UINT64_function GB_ONE_f_UINT64
-#define GB_DEF_GxB_ONE_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ONE_UINT64_xtype uint64_t
-
-#define GB_DEF_GxB_ONE_FP32_function GB_ONE_f_FP32
-#define GB_DEF_GxB_ONE_FP32_ztype float
-#define GB_DEF_GxB_ONE_FP32_xtype float
-
-#define GB_DEF_GxB_ONE_FP64_function GB_ONE_f_FP64
-#define GB_DEF_GxB_ONE_FP64_ztype double
-#define GB_DEF_GxB_ONE_FP64_xtype double
-
-// op: ABS
-#define GB_DEF_GxB_ABS_BOOL_function GB_ABS_f_BOOL
-#define GB_DEF_GxB_ABS_BOOL_ztype bool
-#define GB_DEF_GxB_ABS_BOOL_xtype bool
-
-#define GB_DEF_GxB_ABS_INT8_function GB_ABS_f_INT8
-#define GB_DEF_GxB_ABS_INT8_ztype int8_t
-#define GB_DEF_GxB_ABS_INT8_xtype int8_t
-
-#define GB_DEF_GxB_ABS_UINT8_function GB_ABS_f_UINT8
-#define GB_DEF_GxB_ABS_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ABS_UINT8_xtype uint8_t
-
-#define GB_DEF_GxB_ABS_INT16_function GB_ABS_f_INT16
-#define GB_DEF_GxB_ABS_INT16_ztype int16_t
-#define GB_DEF_GxB_ABS_INT16_xtype int16_t
-
-#define GB_DEF_GxB_ABS_UINT16_function GB_ABS_f_UINT16
-#define GB_DEF_GxB_ABS_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ABS_UINT16_xtype uint16_t
-
-#define GB_DEF_GxB_ABS_INT32_function GB_ABS_f_INT32
-#define GB_DEF_GxB_ABS_INT32_ztype int32_t
-#define GB_DEF_GxB_ABS_INT32_xtype int32_t
-
-#define GB_DEF_GxB_ABS_UINT32_function GB_ABS_f_UINT32
-#define GB_DEF_GxB_ABS_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ABS_UINT32_xtype uint32_t
-
-#define GB_DEF_GxB_ABS_INT64_function GB_ABS_f_INT64
-#define GB_DEF_GxB_ABS_INT64_ztype int64_t
-#define GB_DEF_GxB_ABS_INT64_xtype int64_t
-
-#define GB_DEF_GxB_ABS_UINT64_function GB_ABS_f_UINT64
-#define GB_DEF_GxB_ABS_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ABS_UINT64_xtype uint64_t
-
-#define GB_DEF_GxB_ABS_FP32_function GB_ABS_f_FP32
-#define GB_DEF_GxB_ABS_FP32_ztype float
-#define GB_DEF_GxB_ABS_FP32_xtype float
-
-#define GB_DEF_GxB_ABS_FP64_function GB_ABS_f_FP64
-#define GB_DEF_GxB_ABS_FP64_ztype double
-#define GB_DEF_GxB_ABS_FP64_xtype double
-
-#define GB_DEF_GrB_LNOT_function GB_LNOT_f_BOOL
-#define GB_DEF_GrB_LNOT_ztype bool
-#define GB_DEF_GrB_LNOT_xtype bool
-
-//------------------------------------------------------
-// binary operators of the form z=f(x,y): TxT -> T
-//------------------------------------------------------
-
-// op: FIRST
-#define GB_DEF_GrB_FIRST_BOOL_function GB_FIRST_f_BOOL
-#define GB_DEF_GrB_FIRST_BOOL_ztype bool
-#define GB_DEF_GrB_FIRST_BOOL_xtype bool
-#define GB_DEF_GrB_FIRST_BOOL_ytype bool
-
-#define GB_DEF_GrB_FIRST_INT8_function GB_FIRST_f_INT8
-#define GB_DEF_GrB_FIRST_INT8_ztype int8_t
-#define GB_DEF_GrB_FIRST_INT8_xtype int8_t
-#define GB_DEF_GrB_FIRST_INT8_ytype int8_t
-
-#define GB_DEF_GrB_FIRST_UINT8_function GB_FIRST_f_UINT8
-#define GB_DEF_GrB_FIRST_UINT8_ztype uint8_t
-#define GB_DEF_GrB_FIRST_UINT8_xtype uint8_t
-#define GB_DEF_GrB_FIRST_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_FIRST_INT16_function GB_FIRST_f_INT16
-#define GB_DEF_GrB_FIRST_INT16_ztype int16_t
-#define GB_DEF_GrB_FIRST_INT16_xtype int16_t
-#define GB_DEF_GrB_FIRST_INT16_ytype int16_t
-
-#define GB_DEF_GrB_FIRST_UINT16_function GB_FIRST_f_UINT16
-#define GB_DEF_GrB_FIRST_UINT16_ztype uint16_t
-#define GB_DEF_GrB_FIRST_UINT16_xtype uint16_t
-#define GB_DEF_GrB_FIRST_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_FIRST_INT32_function GB_FIRST_f_INT32
-#define GB_DEF_GrB_FIRST_INT32_ztype int32_t
-#define GB_DEF_GrB_FIRST_INT32_xtype int32_t
-#define GB_DEF_GrB_FIRST_INT32_ytype int32_t
-
-#define GB_DEF_GrB_FIRST_UINT32_function GB_FIRST_f_UINT32
-#define GB_DEF_GrB_FIRST_UINT32_ztype uint32_t
-#define GB_DEF_GrB_FIRST_UINT32_xtype uint32_t
-#define GB_DEF_GrB_FIRST_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_FIRST_INT64_function GB_FIRST_f_INT64
-#define GB_DEF_GrB_FIRST_INT64_ztype int64_t
-#define GB_DEF_GrB_FIRST_INT64_xtype int64_t
-#define GB_DEF_GrB_FIRST_INT64_ytype int64_t
-
-#define GB_DEF_GrB_FIRST_UINT64_function GB_FIRST_f_UINT64
-#define GB_DEF_GrB_FIRST_UINT64_ztype uint64_t
-#define GB_DEF_GrB_FIRST_UINT64_xtype uint64_t
-#define GB_DEF_GrB_FIRST_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_FIRST_FP32_function GB_FIRST_f_FP32
-#define GB_DEF_GrB_FIRST_FP32_ztype float
-#define GB_DEF_GrB_FIRST_FP32_xtype float
-#define GB_DEF_GrB_FIRST_FP32_ytype float
-
-#define GB_DEF_GrB_FIRST_FP64_function GB_FIRST_f_FP64
-#define GB_DEF_GrB_FIRST_FP64_ztype double
-#define GB_DEF_GrB_FIRST_FP64_xtype double
-#define GB_DEF_GrB_FIRST_FP64_ytype double
-
-// op: SECOND
-#define GB_DEF_GrB_SECOND_BOOL_function GB_SECOND_f_BOOL
-#define GB_DEF_GrB_SECOND_BOOL_ztype bool
-#define GB_DEF_GrB_SECOND_BOOL_xtype bool
-#define GB_DEF_GrB_SECOND_BOOL_ytype bool
-
-#define GB_DEF_GrB_SECOND_INT8_function GB_SECOND_f_INT8
-#define GB_DEF_GrB_SECOND_INT8_ztype int8_t
-#define GB_DEF_GrB_SECOND_INT8_xtype int8_t
-#define GB_DEF_GrB_SECOND_INT8_ytype int8_t
-
-#define GB_DEF_GrB_SECOND_UINT8_function GB_SECOND_f_UINT8
-#define GB_DEF_GrB_SECOND_UINT8_ztype uint8_t
-#define GB_DEF_GrB_SECOND_UINT8_xtype uint8_t
-#define GB_DEF_GrB_SECOND_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_SECOND_INT16_function GB_SECOND_f_INT16
-#define GB_DEF_GrB_SECOND_INT16_ztype int16_t
-#define GB_DEF_GrB_SECOND_INT16_xtype int16_t
-#define GB_DEF_GrB_SECOND_INT16_ytype int16_t
-
-#define GB_DEF_GrB_SECOND_UINT16_function GB_SECOND_f_UINT16
-#define GB_DEF_GrB_SECOND_UINT16_ztype uint16_t
-#define GB_DEF_GrB_SECOND_UINT16_xtype uint16_t
-#define GB_DEF_GrB_SECOND_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_SECOND_INT32_function GB_SECOND_f_INT32
-#define GB_DEF_GrB_SECOND_INT32_ztype int32_t
-#define GB_DEF_GrB_SECOND_INT32_xtype int32_t
-#define GB_DEF_GrB_SECOND_INT32_ytype int32_t
-
-#define GB_DEF_GrB_SECOND_UINT32_function GB_SECOND_f_UINT32
-#define GB_DEF_GrB_SECOND_UINT32_ztype uint32_t
-#define GB_DEF_GrB_SECOND_UINT32_xtype uint32_t
-#define GB_DEF_GrB_SECOND_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_SECOND_INT64_function GB_SECOND_f_INT64
-#define GB_DEF_GrB_SECOND_INT64_ztype int64_t
-#define GB_DEF_GrB_SECOND_INT64_xtype int64_t
-#define GB_DEF_GrB_SECOND_INT64_ytype int64_t
-
-#define GB_DEF_GrB_SECOND_UINT64_function GB_SECOND_f_UINT64
-#define GB_DEF_GrB_SECOND_UINT64_ztype uint64_t
-#define GB_DEF_GrB_SECOND_UINT64_xtype uint64_t
-#define GB_DEF_GrB_SECOND_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_SECOND_FP32_function GB_SECOND_f_FP32
-#define GB_DEF_GrB_SECOND_FP32_ztype float
-#define GB_DEF_GrB_SECOND_FP32_xtype float
-#define GB_DEF_GrB_SECOND_FP32_ytype float
-
-#define GB_DEF_GrB_SECOND_FP64_function GB_SECOND_f_FP64
-#define GB_DEF_GrB_SECOND_FP64_ztype double
-#define GB_DEF_GrB_SECOND_FP64_xtype double
-#define GB_DEF_GrB_SECOND_FP64_ytype double
-
-// op: MIN
-#define GB_DEF_GrB_MIN_BOOL_function GB_MIN_f_BOOL
-#define GB_DEF_GrB_MIN_BOOL_ztype bool
-#define GB_DEF_GrB_MIN_BOOL_xtype bool
-#define GB_DEF_GrB_MIN_BOOL_ytype bool
-
-#define GB_DEF_GrB_MIN_INT8_function GB_MIN_f_INT8
-#define GB_DEF_GrB_MIN_INT8_ztype int8_t
-#define GB_DEF_GrB_MIN_INT8_xtype int8_t
-#define GB_DEF_GrB_MIN_INT8_ytype int8_t
-
-#define GB_DEF_GrB_MIN_UINT8_function GB_MIN_f_UINT8
-#define GB_DEF_GrB_MIN_UINT8_ztype uint8_t
-#define GB_DEF_GrB_MIN_UINT8_xtype uint8_t
-#define GB_DEF_GrB_MIN_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_MIN_INT16_function GB_MIN_f_INT16
-#define GB_DEF_GrB_MIN_INT16_ztype int16_t
-#define GB_DEF_GrB_MIN_INT16_xtype int16_t
-#define GB_DEF_GrB_MIN_INT16_ytype int16_t
-
-#define GB_DEF_GrB_MIN_UINT16_function GB_MIN_f_UINT16
-#define GB_DEF_GrB_MIN_UINT16_ztype uint16_t
-#define GB_DEF_GrB_MIN_UINT16_xtype uint16_t
-#define GB_DEF_GrB_MIN_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_MIN_INT32_function GB_MIN_f_INT32
-#define GB_DEF_GrB_MIN_INT32_ztype int32_t
-#define GB_DEF_GrB_MIN_INT32_xtype int32_t
-#define GB_DEF_GrB_MIN_INT32_ytype int32_t
-
-#define GB_DEF_GrB_MIN_UINT32_function GB_MIN_f_UINT32
-#define GB_DEF_GrB_MIN_UINT32_ztype uint32_t
-#define GB_DEF_GrB_MIN_UINT32_xtype uint32_t
-#define GB_DEF_GrB_MIN_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_MIN_INT64_function GB_MIN_f_INT64
-#define GB_DEF_GrB_MIN_INT64_ztype int64_t
-#define GB_DEF_GrB_MIN_INT64_xtype int64_t
-#define GB_DEF_GrB_MIN_INT64_ytype int64_t
-
-#define GB_DEF_GrB_MIN_UINT64_function GB_MIN_f_UINT64
-#define GB_DEF_GrB_MIN_UINT64_ztype uint64_t
-#define GB_DEF_GrB_MIN_UINT64_xtype uint64_t
-#define GB_DEF_GrB_MIN_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_MIN_FP32_function GB_MIN_f_FP32
-#define GB_DEF_GrB_MIN_FP32_ztype float
-#define GB_DEF_GrB_MIN_FP32_xtype float
-#define GB_DEF_GrB_MIN_FP32_ytype float
-
-#define GB_DEF_GrB_MIN_FP64_function GB_MIN_f_FP64
-#define GB_DEF_GrB_MIN_FP64_ztype double
-#define GB_DEF_GrB_MIN_FP64_xtype double
-#define GB_DEF_GrB_MIN_FP64_ytype double
-
-// op: MAX
-#define GB_DEF_GrB_MAX_BOOL_function GB_MAX_f_BOOL
-#define GB_DEF_GrB_MAX_BOOL_ztype bool
-#define GB_DEF_GrB_MAX_BOOL_xtype bool
-#define GB_DEF_GrB_MAX_BOOL_ytype bool
-
-#define GB_DEF_GrB_MAX_INT8_function GB_MAX_f_INT8
-#define GB_DEF_GrB_MAX_INT8_ztype int8_t
-#define GB_DEF_GrB_MAX_INT8_xtype int8_t
-#define GB_DEF_GrB_MAX_INT8_ytype int8_t
-
-#define GB_DEF_GrB_MAX_UINT8_function GB_MAX_f_UINT8
-#define GB_DEF_GrB_MAX_UINT8_ztype uint8_t
-#define GB_DEF_GrB_MAX_UINT8_xtype uint8_t
-#define GB_DEF_GrB_MAX_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_MAX_INT16_function GB_MAX_f_INT16
-#define GB_DEF_GrB_MAX_INT16_ztype int16_t
-#define GB_DEF_GrB_MAX_INT16_xtype int16_t
-#define GB_DEF_GrB_MAX_INT16_ytype int16_t
-
-#define GB_DEF_GrB_MAX_UINT16_function GB_MAX_f_UINT16
-#define GB_DEF_GrB_MAX_UINT16_ztype uint16_t
-#define GB_DEF_GrB_MAX_UINT16_xtype uint16_t
-#define GB_DEF_GrB_MAX_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_MAX_INT32_function GB_MAX_f_INT32
-#define GB_DEF_GrB_MAX_INT32_ztype int32_t
-#define GB_DEF_GrB_MAX_INT32_xtype int32_t
-#define GB_DEF_GrB_MAX_INT32_ytype int32_t
-
-#define GB_DEF_GrB_MAX_UINT32_function GB_MAX_f_UINT32
-#define GB_DEF_GrB_MAX_UINT32_ztype uint32_t
-#define GB_DEF_GrB_MAX_UINT32_xtype uint32_t
-#define GB_DEF_GrB_MAX_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_MAX_INT64_function GB_MAX_f_INT64
-#define GB_DEF_GrB_MAX_INT64_ztype int64_t
-#define GB_DEF_GrB_MAX_INT64_xtype int64_t
-#define GB_DEF_GrB_MAX_INT64_ytype int64_t
-
-#define GB_DEF_GrB_MAX_UINT64_function GB_MAX_f_UINT64
-#define GB_DEF_GrB_MAX_UINT64_ztype uint64_t
-#define GB_DEF_GrB_MAX_UINT64_xtype uint64_t
-#define GB_DEF_GrB_MAX_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_MAX_FP32_function GB_MAX_f_FP32
-#define GB_DEF_GrB_MAX_FP32_ztype float
-#define GB_DEF_GrB_MAX_FP32_xtype float
-#define GB_DEF_GrB_MAX_FP32_ytype float
-
-#define GB_DEF_GrB_MAX_FP64_function GB_MAX_f_FP64
-#define GB_DEF_GrB_MAX_FP64_ztype double
-#define GB_DEF_GrB_MAX_FP64_xtype double
-#define GB_DEF_GrB_MAX_FP64_ytype double
-
-// op: PLUS
-#define GB_DEF_GrB_PLUS_BOOL_function GB_PLUS_f_BOOL
-#define GB_DEF_GrB_PLUS_BOOL_ztype bool
-#define GB_DEF_GrB_PLUS_BOOL_xtype bool
-#define GB_DEF_GrB_PLUS_BOOL_ytype bool
-
-#define GB_DEF_GrB_PLUS_INT8_function GB_PLUS_f_INT8
-#define GB_DEF_GrB_PLUS_INT8_ztype int8_t
-#define GB_DEF_GrB_PLUS_INT8_xtype int8_t
-#define GB_DEF_GrB_PLUS_INT8_ytype int8_t
-
-#define GB_DEF_GrB_PLUS_UINT8_function GB_PLUS_f_UINT8
-#define GB_DEF_GrB_PLUS_UINT8_ztype uint8_t
-#define GB_DEF_GrB_PLUS_UINT8_xtype uint8_t
-#define GB_DEF_GrB_PLUS_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_PLUS_INT16_function GB_PLUS_f_INT16
-#define GB_DEF_GrB_PLUS_INT16_ztype int16_t
-#define GB_DEF_GrB_PLUS_INT16_xtype int16_t
-#define GB_DEF_GrB_PLUS_INT16_ytype int16_t
-
-#define GB_DEF_GrB_PLUS_UINT16_function GB_PLUS_f_UINT16
-#define GB_DEF_GrB_PLUS_UINT16_ztype uint16_t
-#define GB_DEF_GrB_PLUS_UINT16_xtype uint16_t
-#define GB_DEF_GrB_PLUS_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_PLUS_INT32_function GB_PLUS_f_INT32
-#define GB_DEF_GrB_PLUS_INT32_ztype int32_t
-#define GB_DEF_GrB_PLUS_INT32_xtype int32_t
-#define GB_DEF_GrB_PLUS_INT32_ytype int32_t
-
-#define GB_DEF_GrB_PLUS_UINT32_function GB_PLUS_f_UINT32
-#define GB_DEF_GrB_PLUS_UINT32_ztype uint32_t
-#define GB_DEF_GrB_PLUS_UINT32_xtype uint32_t
-#define GB_DEF_GrB_PLUS_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_PLUS_INT64_function GB_PLUS_f_INT64
-#define GB_DEF_GrB_PLUS_INT64_ztype int64_t
-#define GB_DEF_GrB_PLUS_INT64_xtype int64_t
-#define GB_DEF_GrB_PLUS_INT64_ytype int64_t
-
-#define GB_DEF_GrB_PLUS_UINT64_function GB_PLUS_f_UINT64
-#define GB_DEF_GrB_PLUS_UINT64_ztype uint64_t
-#define GB_DEF_GrB_PLUS_UINT64_xtype uint64_t
-#define GB_DEF_GrB_PLUS_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_PLUS_FP32_function GB_PLUS_f_FP32
-#define GB_DEF_GrB_PLUS_FP32_ztype float
-#define GB_DEF_GrB_PLUS_FP32_xtype float
-#define GB_DEF_GrB_PLUS_FP32_ytype float
-
-#define GB_DEF_GrB_PLUS_FP64_function GB_PLUS_f_FP64
-#define GB_DEF_GrB_PLUS_FP64_ztype double
-#define GB_DEF_GrB_PLUS_FP64_xtype double
-#define GB_DEF_GrB_PLUS_FP64_ytype double
-
-// op: MINUS
-#define GB_DEF_GrB_MINUS_BOOL_function GB_MINUS_f_BOOL
-#define GB_DEF_GrB_MINUS_BOOL_ztype bool
-#define GB_DEF_GrB_MINUS_BOOL_xtype bool
-#define GB_DEF_GrB_MINUS_BOOL_ytype bool
-
-#define GB_DEF_GrB_MINUS_INT8_function GB_MINUS_f_INT8
-#define GB_DEF_GrB_MINUS_INT8_ztype int8_t
-#define GB_DEF_GrB_MINUS_INT8_xtype int8_t
-#define GB_DEF_GrB_MINUS_INT8_ytype int8_t
-
-#define GB_DEF_GrB_MINUS_UINT8_function GB_MINUS_f_UINT8
-#define GB_DEF_GrB_MINUS_UINT8_ztype uint8_t
-#define GB_DEF_GrB_MINUS_UINT8_xtype uint8_t
-#define GB_DEF_GrB_MINUS_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_MINUS_INT16_function GB_MINUS_f_INT16
-#define GB_DEF_GrB_MINUS_INT16_ztype int16_t
-#define GB_DEF_GrB_MINUS_INT16_xtype int16_t
-#define GB_DEF_GrB_MINUS_INT16_ytype int16_t
-
-#define GB_DEF_GrB_MINUS_UINT16_function GB_MINUS_f_UINT16
-#define GB_DEF_GrB_MINUS_UINT16_ztype uint16_t
-#define GB_DEF_GrB_MINUS_UINT16_xtype uint16_t
-#define GB_DEF_GrB_MINUS_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_MINUS_INT32_function GB_MINUS_f_INT32
-#define GB_DEF_GrB_MINUS_INT32_ztype int32_t
-#define GB_DEF_GrB_MINUS_INT32_xtype int32_t
-#define GB_DEF_GrB_MINUS_INT32_ytype int32_t
-
-#define GB_DEF_GrB_MINUS_UINT32_function GB_MINUS_f_UINT32
-#define GB_DEF_GrB_MINUS_UINT32_ztype uint32_t
-#define GB_DEF_GrB_MINUS_UINT32_xtype uint32_t
-#define GB_DEF_GrB_MINUS_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_MINUS_INT64_function GB_MINUS_f_INT64
-#define GB_DEF_GrB_MINUS_INT64_ztype int64_t
-#define GB_DEF_GrB_MINUS_INT64_xtype int64_t
-#define GB_DEF_GrB_MINUS_INT64_ytype int64_t
-
-#define GB_DEF_GrB_MINUS_UINT64_function GB_MINUS_f_UINT64
-#define GB_DEF_GrB_MINUS_UINT64_ztype uint64_t
-#define GB_DEF_GrB_MINUS_UINT64_xtype uint64_t
-#define GB_DEF_GrB_MINUS_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_MINUS_FP32_function GB_MINUS_f_FP32
-#define GB_DEF_GrB_MINUS_FP32_ztype float
-#define GB_DEF_GrB_MINUS_FP32_xtype float
-#define GB_DEF_GrB_MINUS_FP32_ytype float
-
-#define GB_DEF_GrB_MINUS_FP64_function GB_MINUS_f_FP64
-#define GB_DEF_GrB_MINUS_FP64_ztype double
-#define GB_DEF_GrB_MINUS_FP64_xtype double
-#define GB_DEF_GrB_MINUS_FP64_ytype double
-
-// op: RMINUS
-#define GB_DEF_GxB_RMINUS_BOOL_function GB_RMINUS_f_BOOL
-#define GB_DEF_GxB_RMINUS_BOOL_ztype bool
-#define GB_DEF_GxB_RMINUS_BOOL_xtype bool
-#define GB_DEF_GxB_RMINUS_BOOL_ytype bool
-
-#define GB_DEF_GxB_RMINUS_INT8_function GB_RMINUS_f_INT8
-#define GB_DEF_GxB_RMINUS_INT8_ztype int8_t
-#define GB_DEF_GxB_RMINUS_INT8_xtype int8_t
-#define GB_DEF_GxB_RMINUS_INT8_ytype int8_t
-
-#define GB_DEF_GxB_RMINUS_UINT8_function GB_RMINUS_f_UINT8
-#define GB_DEF_GxB_RMINUS_UINT8_ztype uint8_t
-#define GB_DEF_GxB_RMINUS_UINT8_xtype uint8_t
-#define GB_DEF_GxB_RMINUS_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_RMINUS_INT16_function GB_RMINUS_f_INT16
-#define GB_DEF_GxB_RMINUS_INT16_ztype int16_t
-#define GB_DEF_GxB_RMINUS_INT16_xtype int16_t
-#define GB_DEF_GxB_RMINUS_INT16_ytype int16_t
-
-#define GB_DEF_GxB_RMINUS_UINT16_function GB_RMINUS_f_UINT16
-#define GB_DEF_GxB_RMINUS_UINT16_ztype uint16_t
-#define GB_DEF_GxB_RMINUS_UINT16_xtype uint16_t
-#define GB_DEF_GxB_RMINUS_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_RMINUS_INT32_function GB_RMINUS_f_INT32
-#define GB_DEF_GxB_RMINUS_INT32_ztype int32_t
-#define GB_DEF_GxB_RMINUS_INT32_xtype int32_t
-#define GB_DEF_GxB_RMINUS_INT32_ytype int32_t
-
-#define GB_DEF_GxB_RMINUS_UINT32_function GB_RMINUS_f_UINT32
-#define GB_DEF_GxB_RMINUS_UINT32_ztype uint32_t
-#define GB_DEF_GxB_RMINUS_UINT32_xtype uint32_t
-#define GB_DEF_GxB_RMINUS_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_RMINUS_INT64_function GB_RMINUS_f_INT64
-#define GB_DEF_GxB_RMINUS_INT64_ztype int64_t
-#define GB_DEF_GxB_RMINUS_INT64_xtype int64_t
-#define GB_DEF_GxB_RMINUS_INT64_ytype int64_t
-
-#define GB_DEF_GxB_RMINUS_UINT64_function GB_RMINUS_f_UINT64
-#define GB_DEF_GxB_RMINUS_UINT64_ztype uint64_t
-#define GB_DEF_GxB_RMINUS_UINT64_xtype uint64_t
-#define GB_DEF_GxB_RMINUS_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_RMINUS_FP32_function GB_RMINUS_f_FP32
-#define GB_DEF_GxB_RMINUS_FP32_ztype float
-#define GB_DEF_GxB_RMINUS_FP32_xtype float
-#define GB_DEF_GxB_RMINUS_FP32_ytype float
-
-#define GB_DEF_GxB_RMINUS_FP64_function GB_RMINUS_f_FP64
-#define GB_DEF_GxB_RMINUS_FP64_ztype double
-#define GB_DEF_GxB_RMINUS_FP64_xtype double
-#define GB_DEF_GxB_RMINUS_FP64_ytype double
-
-// op: TIMES
-#define GB_DEF_GrB_TIMES_BOOL_function GB_TIMES_f_BOOL
-#define GB_DEF_GrB_TIMES_BOOL_ztype bool
-#define GB_DEF_GrB_TIMES_BOOL_xtype bool
-#define GB_DEF_GrB_TIMES_BOOL_ytype bool
-
-#define GB_DEF_GrB_TIMES_INT8_function GB_TIMES_f_INT8
-#define GB_DEF_GrB_TIMES_INT8_ztype int8_t
-#define GB_DEF_GrB_TIMES_INT8_xtype int8_t
-#define GB_DEF_GrB_TIMES_INT8_ytype int8_t
-
-#define GB_DEF_GrB_TIMES_UINT8_function GB_TIMES_f_UINT8
-#define GB_DEF_GrB_TIMES_UINT8_ztype uint8_t
-#define GB_DEF_GrB_TIMES_UINT8_xtype uint8_t
-#define GB_DEF_GrB_TIMES_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_TIMES_INT16_function GB_TIMES_f_INT16
-#define GB_DEF_GrB_TIMES_INT16_ztype int16_t
-#define GB_DEF_GrB_TIMES_INT16_xtype int16_t
-#define GB_DEF_GrB_TIMES_INT16_ytype int16_t
-
-#define GB_DEF_GrB_TIMES_UINT16_function GB_TIMES_f_UINT16
-#define GB_DEF_GrB_TIMES_UINT16_ztype uint16_t
-#define GB_DEF_GrB_TIMES_UINT16_xtype uint16_t
-#define GB_DEF_GrB_TIMES_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_TIMES_INT32_function GB_TIMES_f_INT32
-#define GB_DEF_GrB_TIMES_INT32_ztype int32_t
-#define GB_DEF_GrB_TIMES_INT32_xtype int32_t
-#define GB_DEF_GrB_TIMES_INT32_ytype int32_t
-
-#define GB_DEF_GrB_TIMES_UINT32_function GB_TIMES_f_UINT32
-#define GB_DEF_GrB_TIMES_UINT32_ztype uint32_t
-#define GB_DEF_GrB_TIMES_UINT32_xtype uint32_t
-#define GB_DEF_GrB_TIMES_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_TIMES_INT64_function GB_TIMES_f_INT64
-#define GB_DEF_GrB_TIMES_INT64_ztype int64_t
-#define GB_DEF_GrB_TIMES_INT64_xtype int64_t
-#define GB_DEF_GrB_TIMES_INT64_ytype int64_t
-
-#define GB_DEF_GrB_TIMES_UINT64_function GB_TIMES_f_UINT64
-#define GB_DEF_GrB_TIMES_UINT64_ztype uint64_t
-#define GB_DEF_GrB_TIMES_UINT64_xtype uint64_t
-#define GB_DEF_GrB_TIMES_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_TIMES_FP32_function GB_TIMES_f_FP32
-#define GB_DEF_GrB_TIMES_FP32_ztype float
-#define GB_DEF_GrB_TIMES_FP32_xtype float
-#define GB_DEF_GrB_TIMES_FP32_ytype float
-
-#define GB_DEF_GrB_TIMES_FP64_function GB_TIMES_f_FP64
-#define GB_DEF_GrB_TIMES_FP64_ztype double
-#define GB_DEF_GrB_TIMES_FP64_xtype double
-#define GB_DEF_GrB_TIMES_FP64_ytype double
-
-// op: DIV
-#define GB_DEF_GrB_DIV_BOOL_function GB_DIV_f_BOOL
-#define GB_DEF_GrB_DIV_BOOL_ztype bool
-#define GB_DEF_GrB_DIV_BOOL_xtype bool
-#define GB_DEF_GrB_DIV_BOOL_ytype bool
-
-#define GB_DEF_GrB_DIV_INT8_function GB_DIV_f_INT8
-#define GB_DEF_GrB_DIV_INT8_ztype int8_t
-#define GB_DEF_GrB_DIV_INT8_xtype int8_t
-#define GB_DEF_GrB_DIV_INT8_ytype int8_t
-
-#define GB_DEF_GrB_DIV_UINT8_function GB_DIV_f_UINT8
-#define GB_DEF_GrB_DIV_UINT8_ztype uint8_t
-#define GB_DEF_GrB_DIV_UINT8_xtype uint8_t
-#define GB_DEF_GrB_DIV_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_DIV_INT16_function GB_DIV_f_INT16
-#define GB_DEF_GrB_DIV_INT16_ztype int16_t
-#define GB_DEF_GrB_DIV_INT16_xtype int16_t
-#define GB_DEF_GrB_DIV_INT16_ytype int16_t
-
-#define GB_DEF_GrB_DIV_UINT16_function GB_DIV_f_UINT16
-#define GB_DEF_GrB_DIV_UINT16_ztype uint16_t
-#define GB_DEF_GrB_DIV_UINT16_xtype uint16_t
-#define GB_DEF_GrB_DIV_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_DIV_INT32_function GB_DIV_f_INT32
-#define GB_DEF_GrB_DIV_INT32_ztype int32_t
-#define GB_DEF_GrB_DIV_INT32_xtype int32_t
-#define GB_DEF_GrB_DIV_INT32_ytype int32_t
-
-#define GB_DEF_GrB_DIV_UINT32_function GB_DIV_f_UINT32
-#define GB_DEF_GrB_DIV_UINT32_ztype uint32_t
-#define GB_DEF_GrB_DIV_UINT32_xtype uint32_t
-#define GB_DEF_GrB_DIV_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_DIV_INT64_function GB_DIV_f_INT64
-#define GB_DEF_GrB_DIV_INT64_ztype int64_t
-#define GB_DEF_GrB_DIV_INT64_xtype int64_t
-#define GB_DEF_GrB_DIV_INT64_ytype int64_t
-
-#define GB_DEF_GrB_DIV_UINT64_function GB_DIV_f_UINT64
-#define GB_DEF_GrB_DIV_UINT64_ztype uint64_t
-#define GB_DEF_GrB_DIV_UINT64_xtype uint64_t
-#define GB_DEF_GrB_DIV_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_DIV_FP32_function GB_DIV_f_FP32
-#define GB_DEF_GrB_DIV_FP32_ztype float
-#define GB_DEF_GrB_DIV_FP32_xtype float
-#define GB_DEF_GrB_DIV_FP32_ytype float
-
-#define GB_DEF_GrB_DIV_FP64_function GB_DIV_f_FP64
-#define GB_DEF_GrB_DIV_FP64_ztype double
-#define GB_DEF_GrB_DIV_FP64_xtype double
-#define GB_DEF_GrB_DIV_FP64_ytype double
-
-// op: RDIV
-#define GB_DEF_GxB_RDIV_BOOL_function GB_RDIV_f_BOOL
-#define GB_DEF_GxB_RDIV_BOOL_ztype bool
-#define GB_DEF_GxB_RDIV_BOOL_xtype bool
-#define GB_DEF_GxB_RDIV_BOOL_ytype bool
-
-#define GB_DEF_GxB_RDIV_INT8_function GB_RDIV_f_INT8
-#define GB_DEF_GxB_RDIV_INT8_ztype int8_t
-#define GB_DEF_GxB_RDIV_INT8_xtype int8_t
-#define GB_DEF_GxB_RDIV_INT8_ytype int8_t
-
-#define GB_DEF_GxB_RDIV_UINT8_function GB_RDIV_f_UINT8
-#define GB_DEF_GxB_RDIV_UINT8_ztype uint8_t
-#define GB_DEF_GxB_RDIV_UINT8_xtype uint8_t
-#define GB_DEF_GxB_RDIV_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_RDIV_INT16_function GB_RDIV_f_INT16
-#define GB_DEF_GxB_RDIV_INT16_ztype int16_t
-#define GB_DEF_GxB_RDIV_INT16_xtype int16_t
-#define GB_DEF_GxB_RDIV_INT16_ytype int16_t
-
-#define GB_DEF_GxB_RDIV_UINT16_function GB_RDIV_f_UINT16
-#define GB_DEF_GxB_RDIV_UINT16_ztype uint16_t
-#define GB_DEF_GxB_RDIV_UINT16_xtype uint16_t
-#define GB_DEF_GxB_RDIV_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_RDIV_INT32_function GB_RDIV_f_INT32
-#define GB_DEF_GxB_RDIV_INT32_ztype int32_t
-#define GB_DEF_GxB_RDIV_INT32_xtype int32_t
-#define GB_DEF_GxB_RDIV_INT32_ytype int32_t
-
-#define GB_DEF_GxB_RDIV_UINT32_function GB_RDIV_f_UINT32
-#define GB_DEF_GxB_RDIV_UINT32_ztype uint32_t
-#define GB_DEF_GxB_RDIV_UINT32_xtype uint32_t
-#define GB_DEF_GxB_RDIV_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_RDIV_INT64_function GB_RDIV_f_INT64
-#define GB_DEF_GxB_RDIV_INT64_ztype int64_t
-#define GB_DEF_GxB_RDIV_INT64_xtype int64_t
-#define GB_DEF_GxB_RDIV_INT64_ytype int64_t
-
-#define GB_DEF_GxB_RDIV_UINT64_function GB_RDIV_f_UINT64
-#define GB_DEF_GxB_RDIV_UINT64_ztype uint64_t
-#define GB_DEF_GxB_RDIV_UINT64_xtype uint64_t
-#define GB_DEF_GxB_RDIV_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_RDIV_FP32_function GB_RDIV_f_FP32
-#define GB_DEF_GxB_RDIV_FP32_ztype float
-#define GB_DEF_GxB_RDIV_FP32_xtype float
-#define GB_DEF_GxB_RDIV_FP32_ytype float
-
-#define GB_DEF_GxB_RDIV_FP64_function GB_RDIV_f_FP64
-#define GB_DEF_GxB_RDIV_FP64_ztype double
-#define GB_DEF_GxB_RDIV_FP64_xtype double
-#define GB_DEF_GxB_RDIV_FP64_ytype double
-
-// op: ISEQ
-#define GB_DEF_GxB_ISEQ_BOOL_function GB_ISEQ_f_BOOL
-#define GB_DEF_GxB_ISEQ_BOOL_ztype bool
-#define GB_DEF_GxB_ISEQ_BOOL_xtype bool
-#define GB_DEF_GxB_ISEQ_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISEQ_INT8_function GB_ISEQ_f_INT8
-#define GB_DEF_GxB_ISEQ_INT8_ztype int8_t
-#define GB_DEF_GxB_ISEQ_INT8_xtype int8_t
-#define GB_DEF_GxB_ISEQ_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISEQ_UINT8_function GB_ISEQ_f_UINT8
-#define GB_DEF_GxB_ISEQ_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISEQ_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISEQ_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISEQ_INT16_function GB_ISEQ_f_INT16
-#define GB_DEF_GxB_ISEQ_INT16_ztype int16_t
-#define GB_DEF_GxB_ISEQ_INT16_xtype int16_t
-#define GB_DEF_GxB_ISEQ_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISEQ_UINT16_function GB_ISEQ_f_UINT16
-#define GB_DEF_GxB_ISEQ_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISEQ_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISEQ_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISEQ_INT32_function GB_ISEQ_f_INT32
-#define GB_DEF_GxB_ISEQ_INT32_ztype int32_t
-#define GB_DEF_GxB_ISEQ_INT32_xtype int32_t
-#define GB_DEF_GxB_ISEQ_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISEQ_UINT32_function GB_ISEQ_f_UINT32
-#define GB_DEF_GxB_ISEQ_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISEQ_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISEQ_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISEQ_INT64_function GB_ISEQ_f_INT64
-#define GB_DEF_GxB_ISEQ_INT64_ztype int64_t
-#define GB_DEF_GxB_ISEQ_INT64_xtype int64_t
-#define GB_DEF_GxB_ISEQ_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISEQ_UINT64_function GB_ISEQ_f_UINT64
-#define GB_DEF_GxB_ISEQ_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISEQ_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISEQ_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISEQ_FP32_function GB_ISEQ_f_FP32
-#define GB_DEF_GxB_ISEQ_FP32_ztype float
-#define GB_DEF_GxB_ISEQ_FP32_xtype float
-#define GB_DEF_GxB_ISEQ_FP32_ytype float
-
-#define GB_DEF_GxB_ISEQ_FP64_function GB_ISEQ_f_FP64
-#define GB_DEF_GxB_ISEQ_FP64_ztype double
-#define GB_DEF_GxB_ISEQ_FP64_xtype double
-#define GB_DEF_GxB_ISEQ_FP64_ytype double
-
-// op: ISNE
-#define GB_DEF_GxB_ISNE_BOOL_function GB_ISNE_f_BOOL
-#define GB_DEF_GxB_ISNE_BOOL_ztype bool
-#define GB_DEF_GxB_ISNE_BOOL_xtype bool
-#define GB_DEF_GxB_ISNE_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISNE_INT8_function GB_ISNE_f_INT8
-#define GB_DEF_GxB_ISNE_INT8_ztype int8_t
-#define GB_DEF_GxB_ISNE_INT8_xtype int8_t
-#define GB_DEF_GxB_ISNE_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISNE_UINT8_function GB_ISNE_f_UINT8
-#define GB_DEF_GxB_ISNE_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISNE_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISNE_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISNE_INT16_function GB_ISNE_f_INT16
-#define GB_DEF_GxB_ISNE_INT16_ztype int16_t
-#define GB_DEF_GxB_ISNE_INT16_xtype int16_t
-#define GB_DEF_GxB_ISNE_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISNE_UINT16_function GB_ISNE_f_UINT16
-#define GB_DEF_GxB_ISNE_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISNE_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISNE_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISNE_INT32_function GB_ISNE_f_INT32
-#define GB_DEF_GxB_ISNE_INT32_ztype int32_t
-#define GB_DEF_GxB_ISNE_INT32_xtype int32_t
-#define GB_DEF_GxB_ISNE_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISNE_UINT32_function GB_ISNE_f_UINT32
-#define GB_DEF_GxB_ISNE_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISNE_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISNE_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISNE_INT64_function GB_ISNE_f_INT64
-#define GB_DEF_GxB_ISNE_INT64_ztype int64_t
-#define GB_DEF_GxB_ISNE_INT64_xtype int64_t
-#define GB_DEF_GxB_ISNE_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISNE_UINT64_function GB_ISNE_f_UINT64
-#define GB_DEF_GxB_ISNE_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISNE_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISNE_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISNE_FP32_function GB_ISNE_f_FP32
-#define GB_DEF_GxB_ISNE_FP32_ztype float
-#define GB_DEF_GxB_ISNE_FP32_xtype float
-#define GB_DEF_GxB_ISNE_FP32_ytype float
-
-#define GB_DEF_GxB_ISNE_FP64_function GB_ISNE_f_FP64
-#define GB_DEF_GxB_ISNE_FP64_ztype double
-#define GB_DEF_GxB_ISNE_FP64_xtype double
-#define GB_DEF_GxB_ISNE_FP64_ytype double
-
-// op: ISGT
-#define GB_DEF_GxB_ISGT_BOOL_function GB_ISGT_f_BOOL
-#define GB_DEF_GxB_ISGT_BOOL_ztype bool
-#define GB_DEF_GxB_ISGT_BOOL_xtype bool
-#define GB_DEF_GxB_ISGT_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISGT_INT8_function GB_ISGT_f_INT8
-#define GB_DEF_GxB_ISGT_INT8_ztype int8_t
-#define GB_DEF_GxB_ISGT_INT8_xtype int8_t
-#define GB_DEF_GxB_ISGT_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISGT_UINT8_function GB_ISGT_f_UINT8
-#define GB_DEF_GxB_ISGT_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISGT_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISGT_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISGT_INT16_function GB_ISGT_f_INT16
-#define GB_DEF_GxB_ISGT_INT16_ztype int16_t
-#define GB_DEF_GxB_ISGT_INT16_xtype int16_t
-#define GB_DEF_GxB_ISGT_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISGT_UINT16_function GB_ISGT_f_UINT16
-#define GB_DEF_GxB_ISGT_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISGT_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISGT_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISGT_INT32_function GB_ISGT_f_INT32
-#define GB_DEF_GxB_ISGT_INT32_ztype int32_t
-#define GB_DEF_GxB_ISGT_INT32_xtype int32_t
-#define GB_DEF_GxB_ISGT_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISGT_UINT32_function GB_ISGT_f_UINT32
-#define GB_DEF_GxB_ISGT_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISGT_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISGT_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISGT_INT64_function GB_ISGT_f_INT64
-#define GB_DEF_GxB_ISGT_INT64_ztype int64_t
-#define GB_DEF_GxB_ISGT_INT64_xtype int64_t
-#define GB_DEF_GxB_ISGT_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISGT_UINT64_function GB_ISGT_f_UINT64
-#define GB_DEF_GxB_ISGT_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISGT_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISGT_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISGT_FP32_function GB_ISGT_f_FP32
-#define GB_DEF_GxB_ISGT_FP32_ztype float
-#define GB_DEF_GxB_ISGT_FP32_xtype float
-#define GB_DEF_GxB_ISGT_FP32_ytype float
-
-#define GB_DEF_GxB_ISGT_FP64_function GB_ISGT_f_FP64
-#define GB_DEF_GxB_ISGT_FP64_ztype double
-#define GB_DEF_GxB_ISGT_FP64_xtype double
-#define GB_DEF_GxB_ISGT_FP64_ytype double
-
-// op: ISLT
-#define GB_DEF_GxB_ISLT_BOOL_function GB_ISLT_f_BOOL
-#define GB_DEF_GxB_ISLT_BOOL_ztype bool
-#define GB_DEF_GxB_ISLT_BOOL_xtype bool
-#define GB_DEF_GxB_ISLT_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISLT_INT8_function GB_ISLT_f_INT8
-#define GB_DEF_GxB_ISLT_INT8_ztype int8_t
-#define GB_DEF_GxB_ISLT_INT8_xtype int8_t
-#define GB_DEF_GxB_ISLT_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISLT_UINT8_function GB_ISLT_f_UINT8
-#define GB_DEF_GxB_ISLT_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISLT_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISLT_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISLT_INT16_function GB_ISLT_f_INT16
-#define GB_DEF_GxB_ISLT_INT16_ztype int16_t
-#define GB_DEF_GxB_ISLT_INT16_xtype int16_t
-#define GB_DEF_GxB_ISLT_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISLT_UINT16_function GB_ISLT_f_UINT16
-#define GB_DEF_GxB_ISLT_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISLT_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISLT_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISLT_INT32_function GB_ISLT_f_INT32
-#define GB_DEF_GxB_ISLT_INT32_ztype int32_t
-#define GB_DEF_GxB_ISLT_INT32_xtype int32_t
-#define GB_DEF_GxB_ISLT_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISLT_UINT32_function GB_ISLT_f_UINT32
-#define GB_DEF_GxB_ISLT_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISLT_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISLT_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISLT_INT64_function GB_ISLT_f_INT64
-#define GB_DEF_GxB_ISLT_INT64_ztype int64_t
-#define GB_DEF_GxB_ISLT_INT64_xtype int64_t
-#define GB_DEF_GxB_ISLT_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISLT_UINT64_function GB_ISLT_f_UINT64
-#define GB_DEF_GxB_ISLT_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISLT_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISLT_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISLT_FP32_function GB_ISLT_f_FP32
-#define GB_DEF_GxB_ISLT_FP32_ztype float
-#define GB_DEF_GxB_ISLT_FP32_xtype float
-#define GB_DEF_GxB_ISLT_FP32_ytype float
-
-#define GB_DEF_GxB_ISLT_FP64_function GB_ISLT_f_FP64
-#define GB_DEF_GxB_ISLT_FP64_ztype double
-#define GB_DEF_GxB_ISLT_FP64_xtype double
-#define GB_DEF_GxB_ISLT_FP64_ytype double
-
-// op: ISGE
-#define GB_DEF_GxB_ISGE_BOOL_function GB_ISGE_f_BOOL
-#define GB_DEF_GxB_ISGE_BOOL_ztype bool
-#define GB_DEF_GxB_ISGE_BOOL_xtype bool
-#define GB_DEF_GxB_ISGE_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISGE_INT8_function GB_ISGE_f_INT8
-#define GB_DEF_GxB_ISGE_INT8_ztype int8_t
-#define GB_DEF_GxB_ISGE_INT8_xtype int8_t
-#define GB_DEF_GxB_ISGE_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISGE_UINT8_function GB_ISGE_f_UINT8
-#define GB_DEF_GxB_ISGE_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISGE_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISGE_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISGE_INT16_function GB_ISGE_f_INT16
-#define GB_DEF_GxB_ISGE_INT16_ztype int16_t
-#define GB_DEF_GxB_ISGE_INT16_xtype int16_t
-#define GB_DEF_GxB_ISGE_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISGE_UINT16_function GB_ISGE_f_UINT16
-#define GB_DEF_GxB_ISGE_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISGE_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISGE_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISGE_INT32_function GB_ISGE_f_INT32
-#define GB_DEF_GxB_ISGE_INT32_ztype int32_t
-#define GB_DEF_GxB_ISGE_INT32_xtype int32_t
-#define GB_DEF_GxB_ISGE_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISGE_UINT32_function GB_ISGE_f_UINT32
-#define GB_DEF_GxB_ISGE_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISGE_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISGE_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISGE_INT64_function GB_ISGE_f_INT64
-#define GB_DEF_GxB_ISGE_INT64_ztype int64_t
-#define GB_DEF_GxB_ISGE_INT64_xtype int64_t
-#define GB_DEF_GxB_ISGE_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISGE_UINT64_function GB_ISGE_f_UINT64
-#define GB_DEF_GxB_ISGE_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISGE_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISGE_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISGE_FP32_function GB_ISGE_f_FP32
-#define GB_DEF_GxB_ISGE_FP32_ztype float
-#define GB_DEF_GxB_ISGE_FP32_xtype float
-#define GB_DEF_GxB_ISGE_FP32_ytype float
-
-#define GB_DEF_GxB_ISGE_FP64_function GB_ISGE_f_FP64
-#define GB_DEF_GxB_ISGE_FP64_ztype double
-#define GB_DEF_GxB_ISGE_FP64_xtype double
-#define GB_DEF_GxB_ISGE_FP64_ytype double
-
-// op: ISLE
-#define GB_DEF_GxB_ISLE_BOOL_function GB_ISLE_f_BOOL
-#define GB_DEF_GxB_ISLE_BOOL_ztype bool
-#define GB_DEF_GxB_ISLE_BOOL_xtype bool
-#define GB_DEF_GxB_ISLE_BOOL_ytype bool
-
-#define GB_DEF_GxB_ISLE_INT8_function GB_ISLE_f_INT8
-#define GB_DEF_GxB_ISLE_INT8_ztype int8_t
-#define GB_DEF_GxB_ISLE_INT8_xtype int8_t
-#define GB_DEF_GxB_ISLE_INT8_ytype int8_t
-
-#define GB_DEF_GxB_ISLE_UINT8_function GB_ISLE_f_UINT8
-#define GB_DEF_GxB_ISLE_UINT8_ztype uint8_t
-#define GB_DEF_GxB_ISLE_UINT8_xtype uint8_t
-#define GB_DEF_GxB_ISLE_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_ISLE_INT16_function GB_ISLE_f_INT16
-#define GB_DEF_GxB_ISLE_INT16_ztype int16_t
-#define GB_DEF_GxB_ISLE_INT16_xtype int16_t
-#define GB_DEF_GxB_ISLE_INT16_ytype int16_t
-
-#define GB_DEF_GxB_ISLE_UINT16_function GB_ISLE_f_UINT16
-#define GB_DEF_GxB_ISLE_UINT16_ztype uint16_t
-#define GB_DEF_GxB_ISLE_UINT16_xtype uint16_t
-#define GB_DEF_GxB_ISLE_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_ISLE_INT32_function GB_ISLE_f_INT32
-#define GB_DEF_GxB_ISLE_INT32_ztype int32_t
-#define GB_DEF_GxB_ISLE_INT32_xtype int32_t
-#define GB_DEF_GxB_ISLE_INT32_ytype int32_t
-
-#define GB_DEF_GxB_ISLE_UINT32_function GB_ISLE_f_UINT32
-#define GB_DEF_GxB_ISLE_UINT32_ztype uint32_t
-#define GB_DEF_GxB_ISLE_UINT32_xtype uint32_t
-#define GB_DEF_GxB_ISLE_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_ISLE_INT64_function GB_ISLE_f_INT64
-#define GB_DEF_GxB_ISLE_INT64_ztype int64_t
-#define GB_DEF_GxB_ISLE_INT64_xtype int64_t
-#define GB_DEF_GxB_ISLE_INT64_ytype int64_t
-
-#define GB_DEF_GxB_ISLE_UINT64_function GB_ISLE_f_UINT64
-#define GB_DEF_GxB_ISLE_UINT64_ztype uint64_t
-#define GB_DEF_GxB_ISLE_UINT64_xtype uint64_t
-#define GB_DEF_GxB_ISLE_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_ISLE_FP32_function GB_ISLE_f_FP32
-#define GB_DEF_GxB_ISLE_FP32_ztype float
-#define GB_DEF_GxB_ISLE_FP32_xtype float
-#define GB_DEF_GxB_ISLE_FP32_ytype float
-
-#define GB_DEF_GxB_ISLE_FP64_function GB_ISLE_f_FP64
-#define GB_DEF_GxB_ISLE_FP64_ztype double
-#define GB_DEF_GxB_ISLE_FP64_xtype double
-#define GB_DEF_GxB_ISLE_FP64_ytype double
-
-// op: LOR
-#define GB_DEF_GrB_LOR_function GB_LOR_f_BOOL
-#define GB_DEF_GrB_LOR_ztype bool
-#define GB_DEF_GrB_LOR_xtype bool
-#define GB_DEF_GrB_LOR_ytype bool
-
-#define GB_DEF_GxB_LOR_BOOL_function GB_LOR_f_BOOL
-#define GB_DEF_GxB_LOR_BOOL_ztype bool
-#define GB_DEF_GxB_LOR_BOOL_xtype bool
-#define GB_DEF_GxB_LOR_BOOL_ytype bool
-
-#define GB_DEF_GxB_LOR_INT8_function GB_LOR_f_INT8
-#define GB_DEF_GxB_LOR_INT8_ztype int8_t
-#define GB_DEF_GxB_LOR_INT8_xtype int8_t
-#define GB_DEF_GxB_LOR_INT8_ytype int8_t
-
-#define GB_DEF_GxB_LOR_UINT8_function GB_LOR_f_UINT8
-#define GB_DEF_GxB_LOR_UINT8_ztype uint8_t
-#define GB_DEF_GxB_LOR_UINT8_xtype uint8_t
-#define GB_DEF_GxB_LOR_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_LOR_INT16_function GB_LOR_f_INT16
-#define GB_DEF_GxB_LOR_INT16_ztype int16_t
-#define GB_DEF_GxB_LOR_INT16_xtype int16_t
-#define GB_DEF_GxB_LOR_INT16_ytype int16_t
-
-#define GB_DEF_GxB_LOR_UINT16_function GB_LOR_f_UINT16
-#define GB_DEF_GxB_LOR_UINT16_ztype uint16_t
-#define GB_DEF_GxB_LOR_UINT16_xtype uint16_t
-#define GB_DEF_GxB_LOR_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_LOR_INT32_function GB_LOR_f_INT32
-#define GB_DEF_GxB_LOR_INT32_ztype int32_t
-#define GB_DEF_GxB_LOR_INT32_xtype int32_t
-#define GB_DEF_GxB_LOR_INT32_ytype int32_t
-
-#define GB_DEF_GxB_LOR_UINT32_function GB_LOR_f_UINT32
-#define GB_DEF_GxB_LOR_UINT32_ztype uint32_t
-#define GB_DEF_GxB_LOR_UINT32_xtype uint32_t
-#define GB_DEF_GxB_LOR_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_LOR_INT64_function GB_LOR_f_INT64
-#define GB_DEF_GxB_LOR_INT64_ztype int64_t
-#define GB_DEF_GxB_LOR_INT64_xtype int64_t
-#define GB_DEF_GxB_LOR_INT64_ytype int64_t
-
-#define GB_DEF_GxB_LOR_UINT64_function GB_LOR_f_UINT64
-#define GB_DEF_GxB_LOR_UINT64_ztype uint64_t
-#define GB_DEF_GxB_LOR_UINT64_xtype uint64_t
-#define GB_DEF_GxB_LOR_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_LOR_FP32_function GB_LOR_f_FP32
-#define GB_DEF_GxB_LOR_FP32_ztype float
-#define GB_DEF_GxB_LOR_FP32_xtype float
-#define GB_DEF_GxB_LOR_FP32_ytype float
-
-#define GB_DEF_GxB_LOR_FP64_function GB_LOR_f_FP64
-#define GB_DEF_GxB_LOR_FP64_ztype double
-#define GB_DEF_GxB_LOR_FP64_xtype double
-#define GB_DEF_GxB_LOR_FP64_ytype double
-
-// op: LAND
-#define GB_DEF_GrB_LAND_function GB_LAND_f_BOOL
-#define GB_DEF_GrB_LAND_ztype bool
-#define GB_DEF_GrB_LAND_xtype bool
-#define GB_DEF_GrB_LAND_ytype bool
-
-#define GB_DEF_GxB_LAND_BOOL_function GB_LAND_f_BOOL
-#define GB_DEF_GxB_LAND_BOOL_ztype bool
-#define GB_DEF_GxB_LAND_BOOL_xtype bool
-#define GB_DEF_GxB_LAND_BOOL_ytype bool
-
-#define GB_DEF_GxB_LAND_INT8_function GB_LAND_f_INT8
-#define GB_DEF_GxB_LAND_INT8_ztype int8_t
-#define GB_DEF_GxB_LAND_INT8_xtype int8_t
-#define GB_DEF_GxB_LAND_INT8_ytype int8_t
-
-#define GB_DEF_GxB_LAND_UINT8_function GB_LAND_f_UINT8
-#define GB_DEF_GxB_LAND_UINT8_ztype uint8_t
-#define GB_DEF_GxB_LAND_UINT8_xtype uint8_t
-#define GB_DEF_GxB_LAND_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_LAND_INT16_function GB_LAND_f_INT16
-#define GB_DEF_GxB_LAND_INT16_ztype int16_t
-#define GB_DEF_GxB_LAND_INT16_xtype int16_t
-#define GB_DEF_GxB_LAND_INT16_ytype int16_t
-
-#define GB_DEF_GxB_LAND_UINT16_function GB_LAND_f_UINT16
-#define GB_DEF_GxB_LAND_UINT16_ztype uint16_t
-#define GB_DEF_GxB_LAND_UINT16_xtype uint16_t
-#define GB_DEF_GxB_LAND_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_LAND_INT32_function GB_LAND_f_INT32
-#define GB_DEF_GxB_LAND_INT32_ztype int32_t
-#define GB_DEF_GxB_LAND_INT32_xtype int32_t
-#define GB_DEF_GxB_LAND_INT32_ytype int32_t
-
-#define GB_DEF_GxB_LAND_UINT32_function GB_LAND_f_UINT32
-#define GB_DEF_GxB_LAND_UINT32_ztype uint32_t
-#define GB_DEF_GxB_LAND_UINT32_xtype uint32_t
-#define GB_DEF_GxB_LAND_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_LAND_INT64_function GB_LAND_f_INT64
-#define GB_DEF_GxB_LAND_INT64_ztype int64_t
-#define GB_DEF_GxB_LAND_INT64_xtype int64_t
-#define GB_DEF_GxB_LAND_INT64_ytype int64_t
-
-#define GB_DEF_GxB_LAND_UINT64_function GB_LAND_f_UINT64
-#define GB_DEF_GxB_LAND_UINT64_ztype uint64_t
-#define GB_DEF_GxB_LAND_UINT64_xtype uint64_t
-#define GB_DEF_GxB_LAND_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_LAND_FP32_function GB_LAND_f_FP32
-#define GB_DEF_GxB_LAND_FP32_ztype float
-#define GB_DEF_GxB_LAND_FP32_xtype float
-#define GB_DEF_GxB_LAND_FP32_ytype float
-
-#define GB_DEF_GxB_LAND_FP64_function GB_LAND_f_FP64
-#define GB_DEF_GxB_LAND_FP64_ztype double
-#define GB_DEF_GxB_LAND_FP64_xtype double
-#define GB_DEF_GxB_LAND_FP64_ytype double
-
-// op: LXOR
-#define GB_DEF_GrB_LXOR_function GB_LXOR_f_BOOL
-#define GB_DEF_GrB_LXOR_ztype bool
-#define GB_DEF_GrB_LXOR_xtype bool
-#define GB_DEF_GrB_LXOR_ytype bool
-
-#define GB_DEF_GxB_LXOR_BOOL_function GB_LXOR_f_BOOL
-#define GB_DEF_GxB_LXOR_BOOL_ztype bool
-#define GB_DEF_GxB_LXOR_BOOL_xtype bool
-#define GB_DEF_GxB_LXOR_BOOL_ytype bool
-
-#define GB_DEF_GxB_LXOR_INT8_function GB_LXOR_f_INT8
-#define GB_DEF_GxB_LXOR_INT8_ztype int8_t
-#define GB_DEF_GxB_LXOR_INT8_xtype int8_t
-#define GB_DEF_GxB_LXOR_INT8_ytype int8_t
-
-#define GB_DEF_GxB_LXOR_UINT8_function GB_LXOR_f_UINT8
-#define GB_DEF_GxB_LXOR_UINT8_ztype uint8_t
-#define GB_DEF_GxB_LXOR_UINT8_xtype uint8_t
-#define GB_DEF_GxB_LXOR_UINT8_ytype uint8_t
-
-#define GB_DEF_GxB_LXOR_INT16_function GB_LXOR_f_INT16
-#define GB_DEF_GxB_LXOR_INT16_ztype int16_t
-#define GB_DEF_GxB_LXOR_INT16_xtype int16_t
-#define GB_DEF_GxB_LXOR_INT16_ytype int16_t
-
-#define GB_DEF_GxB_LXOR_UINT16_function GB_LXOR_f_UINT16
-#define GB_DEF_GxB_LXOR_UINT16_ztype uint16_t
-#define GB_DEF_GxB_LXOR_UINT16_xtype uint16_t
-#define GB_DEF_GxB_LXOR_UINT16_ytype uint16_t
-
-#define GB_DEF_GxB_LXOR_INT32_function GB_LXOR_f_INT32
-#define GB_DEF_GxB_LXOR_INT32_ztype int32_t
-#define GB_DEF_GxB_LXOR_INT32_xtype int32_t
-#define GB_DEF_GxB_LXOR_INT32_ytype int32_t
-
-#define GB_DEF_GxB_LXOR_UINT32_function GB_LXOR_f_UINT32
-#define GB_DEF_GxB_LXOR_UINT32_ztype uint32_t
-#define GB_DEF_GxB_LXOR_UINT32_xtype uint32_t
-#define GB_DEF_GxB_LXOR_UINT32_ytype uint32_t
-
-#define GB_DEF_GxB_LXOR_INT64_function GB_LXOR_f_INT64
-#define GB_DEF_GxB_LXOR_INT64_ztype int64_t
-#define GB_DEF_GxB_LXOR_INT64_xtype int64_t
-#define GB_DEF_GxB_LXOR_INT64_ytype int64_t
-
-#define GB_DEF_GxB_LXOR_UINT64_function GB_LXOR_f_UINT64
-#define GB_DEF_GxB_LXOR_UINT64_ztype uint64_t
-#define GB_DEF_GxB_LXOR_UINT64_xtype uint64_t
-#define GB_DEF_GxB_LXOR_UINT64_ytype uint64_t
-
-#define GB_DEF_GxB_LXOR_FP32_function GB_LXOR_f_FP32
-#define GB_DEF_GxB_LXOR_FP32_ztype float
-#define GB_DEF_GxB_LXOR_FP32_xtype float
-#define GB_DEF_GxB_LXOR_FP32_ytype float
-
-#define GB_DEF_GxB_LXOR_FP64_function GB_LXOR_f_FP64
-#define GB_DEF_GxB_LXOR_FP64_ztype double
-#define GB_DEF_GxB_LXOR_FP64_xtype double
-#define GB_DEF_GxB_LXOR_FP64_ytype double
-
-
-//------------------------------------------------------
-// binary operators of the form z=f(x,y): TxT -> bool
-//------------------------------------------------------
-
-// op: EQ
-#define GB_DEF_GrB_EQ_BOOL_function GB_EQ_f_BOOL
-#define GB_DEF_GrB_EQ_BOOL_ztype bool
-#define GB_DEF_GrB_EQ_BOOL_xtype bool
-#define GB_DEF_GrB_EQ_BOOL_ytype bool
-
-#define GB_DEF_GrB_EQ_INT8_function GB_EQ_f_INT8
-#define GB_DEF_GrB_EQ_INT8_ztype bool
-#define GB_DEF_GrB_EQ_INT8_xtype int8_t
-#define GB_DEF_GrB_EQ_INT8_ytype int8_t
-
-#define GB_DEF_GrB_EQ_UINT8_function GB_EQ_f_UINT8
-#define GB_DEF_GrB_EQ_UINT8_ztype bool
-#define GB_DEF_GrB_EQ_UINT8_xtype uint8_t
-#define GB_DEF_GrB_EQ_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_EQ_INT16_function GB_EQ_f_INT16
-#define GB_DEF_GrB_EQ_INT16_ztype bool
-#define GB_DEF_GrB_EQ_INT16_xtype int16_t
-#define GB_DEF_GrB_EQ_INT16_ytype int16_t
-
-#define GB_DEF_GrB_EQ_UINT16_function GB_EQ_f_UINT16
-#define GB_DEF_GrB_EQ_UINT16_ztype bool
-#define GB_DEF_GrB_EQ_UINT16_xtype uint16_t
-#define GB_DEF_GrB_EQ_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_EQ_INT32_function GB_EQ_f_INT32
-#define GB_DEF_GrB_EQ_INT32_ztype bool
-#define GB_DEF_GrB_EQ_INT32_xtype int32_t
-#define GB_DEF_GrB_EQ_INT32_ytype int32_t
-
-#define GB_DEF_GrB_EQ_UINT32_function GB_EQ_f_UINT32
-#define GB_DEF_GrB_EQ_UINT32_ztype bool
-#define GB_DEF_GrB_EQ_UINT32_xtype uint32_t
-#define GB_DEF_GrB_EQ_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_EQ_INT64_function GB_EQ_f_INT64
-#define GB_DEF_GrB_EQ_INT64_ztype bool
-#define GB_DEF_GrB_EQ_INT64_xtype int64_t
-#define GB_DEF_GrB_EQ_INT64_ytype int64_t
-
-#define GB_DEF_GrB_EQ_UINT64_function GB_EQ_f_UINT64
-#define GB_DEF_GrB_EQ_UINT64_ztype bool
-#define GB_DEF_GrB_EQ_UINT64_xtype uint64_t
-#define GB_DEF_GrB_EQ_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_EQ_FP32_function GB_EQ_f_FP32
-#define GB_DEF_GrB_EQ_FP32_ztype bool
-#define GB_DEF_GrB_EQ_FP32_xtype float
-#define GB_DEF_GrB_EQ_FP32_ytype float
-
-#define GB_DEF_GrB_EQ_FP64_function GB_EQ_f_FP64
-#define GB_DEF_GrB_EQ_FP64_ztype bool
-#define GB_DEF_GrB_EQ_FP64_xtype double
-#define GB_DEF_GrB_EQ_FP64_ytype double
-
-// op: NE
-#define GB_DEF_GrB_NE_BOOL_function GB_NE_f_BOOL
-#define GB_DEF_GrB_NE_BOOL_ztype bool
-#define GB_DEF_GrB_NE_BOOL_xtype bool
-#define GB_DEF_GrB_NE_BOOL_ytype bool
-
-#define GB_DEF_GrB_NE_INT8_function GB_NE_f_INT8
-#define GB_DEF_GrB_NE_INT8_ztype bool
-#define GB_DEF_GrB_NE_INT8_xtype int8_t
-#define GB_DEF_GrB_NE_INT8_ytype int8_t
-
-#define GB_DEF_GrB_NE_UINT8_function GB_NE_f_UINT8
-#define GB_DEF_GrB_NE_UINT8_ztype bool
-#define GB_DEF_GrB_NE_UINT8_xtype uint8_t
-#define GB_DEF_GrB_NE_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_NE_INT16_function GB_NE_f_INT16
-#define GB_DEF_GrB_NE_INT16_ztype bool
-#define GB_DEF_GrB_NE_INT16_xtype int16_t
-#define GB_DEF_GrB_NE_INT16_ytype int16_t
-
-#define GB_DEF_GrB_NE_UINT16_function GB_NE_f_UINT16
-#define GB_DEF_GrB_NE_UINT16_ztype bool
-#define GB_DEF_GrB_NE_UINT16_xtype uint16_t
-#define GB_DEF_GrB_NE_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_NE_INT32_function GB_NE_f_INT32
-#define GB_DEF_GrB_NE_INT32_ztype bool
-#define GB_DEF_GrB_NE_INT32_xtype int32_t
-#define GB_DEF_GrB_NE_INT32_ytype int32_t
-
-#define GB_DEF_GrB_NE_UINT32_function GB_NE_f_UINT32
-#define GB_DEF_GrB_NE_UINT32_ztype bool
-#define GB_DEF_GrB_NE_UINT32_xtype uint32_t
-#define GB_DEF_GrB_NE_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_NE_INT64_function GB_NE_f_INT64
-#define GB_DEF_GrB_NE_INT64_ztype bool
-#define GB_DEF_GrB_NE_INT64_xtype int64_t
-#define GB_DEF_GrB_NE_INT64_ytype int64_t
-
-#define GB_DEF_GrB_NE_UINT64_function GB_NE_f_UINT64
-#define GB_DEF_GrB_NE_UINT64_ztype bool
-#define GB_DEF_GrB_NE_UINT64_xtype uint64_t
-#define GB_DEF_GrB_NE_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_NE_FP32_function GB_NE_f_FP32
-#define GB_DEF_GrB_NE_FP32_ztype bool
-#define GB_DEF_GrB_NE_FP32_xtype float
-#define GB_DEF_GrB_NE_FP32_ytype float
-
-#define GB_DEF_GrB_NE_FP64_function GB_NE_f_FP64
-#define GB_DEF_GrB_NE_FP64_ztype bool
-#define GB_DEF_GrB_NE_FP64_xtype double
-#define GB_DEF_GrB_NE_FP64_ytype double
-
-// op: GT
-#define GB_DEF_GrB_GT_BOOL_function GB_GT_f_BOOL
-#define GB_DEF_GrB_GT_BOOL_ztype bool
-#define GB_DEF_GrB_GT_BOOL_xtype bool
-#define GB_DEF_GrB_GT_BOOL_ytype bool
-
-#define GB_DEF_GrB_GT_INT8_function GB_GT_f_INT8
-#define GB_DEF_GrB_GT_INT8_ztype bool
-#define GB_DEF_GrB_GT_INT8_xtype int8_t
-#define GB_DEF_GrB_GT_INT8_ytype int8_t
-
-#define GB_DEF_GrB_GT_UINT8_function GB_GT_f_UINT8
-#define GB_DEF_GrB_GT_UINT8_ztype bool
-#define GB_DEF_GrB_GT_UINT8_xtype uint8_t
-#define GB_DEF_GrB_GT_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_GT_INT16_function GB_GT_f_INT16
-#define GB_DEF_GrB_GT_INT16_ztype bool
-#define GB_DEF_GrB_GT_INT16_xtype int16_t
-#define GB_DEF_GrB_GT_INT16_ytype int16_t
-
-#define GB_DEF_GrB_GT_UINT16_function GB_GT_f_UINT16
-#define GB_DEF_GrB_GT_UINT16_ztype bool
-#define GB_DEF_GrB_GT_UINT16_xtype uint16_t
-#define GB_DEF_GrB_GT_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_GT_INT32_function GB_GT_f_INT32
-#define GB_DEF_GrB_GT_INT32_ztype bool
-#define GB_DEF_GrB_GT_INT32_xtype int32_t
-#define GB_DEF_GrB_GT_INT32_ytype int32_t
-
-#define GB_DEF_GrB_GT_UINT32_function GB_GT_f_UINT32
-#define GB_DEF_GrB_GT_UINT32_ztype bool
-#define GB_DEF_GrB_GT_UINT32_xtype uint32_t
-#define GB_DEF_GrB_GT_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_GT_INT64_function GB_GT_f_INT64
-#define GB_DEF_GrB_GT_INT64_ztype bool
-#define GB_DEF_GrB_GT_INT64_xtype int64_t
-#define GB_DEF_GrB_GT_INT64_ytype int64_t
-
-#define GB_DEF_GrB_GT_UINT64_function GB_GT_f_UINT64
-#define GB_DEF_GrB_GT_UINT64_ztype bool
-#define GB_DEF_GrB_GT_UINT64_xtype uint64_t
-#define GB_DEF_GrB_GT_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_GT_FP32_function GB_GT_f_FP32
-#define GB_DEF_GrB_GT_FP32_ztype bool
-#define GB_DEF_GrB_GT_FP32_xtype float
-#define GB_DEF_GrB_GT_FP32_ytype float
-
-#define GB_DEF_GrB_GT_FP64_function GB_GT_f_FP64
-#define GB_DEF_GrB_GT_FP64_ztype bool
-#define GB_DEF_GrB_GT_FP64_xtype double
-#define GB_DEF_GrB_GT_FP64_ytype double
-
-// op: LT
-#define GB_DEF_GrB_LT_BOOL_function GB_LT_f_BOOL
-#define GB_DEF_GrB_LT_BOOL_ztype bool
-#define GB_DEF_GrB_LT_BOOL_xtype bool
-#define GB_DEF_GrB_LT_BOOL_ytype bool
-
-#define GB_DEF_GrB_LT_INT8_function GB_LT_f_INT8
-#define GB_DEF_GrB_LT_INT8_ztype bool
-#define GB_DEF_GrB_LT_INT8_xtype int8_t
-#define GB_DEF_GrB_LT_INT8_ytype int8_t
-
-#define GB_DEF_GrB_LT_UINT8_function GB_LT_f_UINT8
-#define GB_DEF_GrB_LT_UINT8_ztype bool
-#define GB_DEF_GrB_LT_UINT8_xtype uint8_t
-#define GB_DEF_GrB_LT_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_LT_INT16_function GB_LT_f_INT16
-#define GB_DEF_GrB_LT_INT16_ztype bool
-#define GB_DEF_GrB_LT_INT16_xtype int16_t
-#define GB_DEF_GrB_LT_INT16_ytype int16_t
-
-#define GB_DEF_GrB_LT_UINT16_function GB_LT_f_UINT16
-#define GB_DEF_GrB_LT_UINT16_ztype bool
-#define GB_DEF_GrB_LT_UINT16_xtype uint16_t
-#define GB_DEF_GrB_LT_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_LT_INT32_function GB_LT_f_INT32
-#define GB_DEF_GrB_LT_INT32_ztype bool
-#define GB_DEF_GrB_LT_INT32_xtype int32_t
-#define GB_DEF_GrB_LT_INT32_ytype int32_t
-
-#define GB_DEF_GrB_LT_UINT32_function GB_LT_f_UINT32
-#define GB_DEF_GrB_LT_UINT32_ztype bool
-#define GB_DEF_GrB_LT_UINT32_xtype uint32_t
-#define GB_DEF_GrB_LT_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_LT_INT64_function GB_LT_f_INT64
-#define GB_DEF_GrB_LT_INT64_ztype bool
-#define GB_DEF_GrB_LT_INT64_xtype int64_t
-#define GB_DEF_GrB_LT_INT64_ytype int64_t
-
-#define GB_DEF_GrB_LT_UINT64_function GB_LT_f_UINT64
-#define GB_DEF_GrB_LT_UINT64_ztype bool
-#define GB_DEF_GrB_LT_UINT64_xtype uint64_t
-#define GB_DEF_GrB_LT_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_LT_FP32_function GB_LT_f_FP32
-#define GB_DEF_GrB_LT_FP32_ztype bool
-#define GB_DEF_GrB_LT_FP32_xtype float
-#define GB_DEF_GrB_LT_FP32_ytype float
-
-#define GB_DEF_GrB_LT_FP64_function GB_LT_f_FP64
-#define GB_DEF_GrB_LT_FP64_ztype bool
-#define GB_DEF_GrB_LT_FP64_xtype double
-#define GB_DEF_GrB_LT_FP64_ytype double
-
-// op: GE
-#define GB_DEF_GrB_GE_BOOL_function GB_GE_f_BOOL
-#define GB_DEF_GrB_GE_BOOL_ztype bool
-#define GB_DEF_GrB_GE_BOOL_xtype bool
-#define GB_DEF_GrB_GE_BOOL_ytype bool
-
-#define GB_DEF_GrB_GE_INT8_function GB_GE_f_INT8
-#define GB_DEF_GrB_GE_INT8_ztype bool
-#define GB_DEF_GrB_GE_INT8_xtype int8_t
-#define GB_DEF_GrB_GE_INT8_ytype int8_t
-
-#define GB_DEF_GrB_GE_UINT8_function GB_GE_f_UINT8
-#define GB_DEF_GrB_GE_UINT8_ztype bool
-#define GB_DEF_GrB_GE_UINT8_xtype uint8_t
-#define GB_DEF_GrB_GE_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_GE_INT16_function GB_GE_f_INT16
-#define GB_DEF_GrB_GE_INT16_ztype bool
-#define GB_DEF_GrB_GE_INT16_xtype int16_t
-#define GB_DEF_GrB_GE_INT16_ytype int16_t
-
-#define GB_DEF_GrB_GE_UINT16_function GB_GE_f_UINT16
-#define GB_DEF_GrB_GE_UINT16_ztype bool
-#define GB_DEF_GrB_GE_UINT16_xtype uint16_t
-#define GB_DEF_GrB_GE_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_GE_INT32_function GB_GE_f_INT32
-#define GB_DEF_GrB_GE_INT32_ztype bool
-#define GB_DEF_GrB_GE_INT32_xtype int32_t
-#define GB_DEF_GrB_GE_INT32_ytype int32_t
-
-#define GB_DEF_GrB_GE_UINT32_function GB_GE_f_UINT32
-#define GB_DEF_GrB_GE_UINT32_ztype bool
-#define GB_DEF_GrB_GE_UINT32_xtype uint32_t
-#define GB_DEF_GrB_GE_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_GE_INT64_function GB_GE_f_INT64
-#define GB_DEF_GrB_GE_INT64_ztype bool
-#define GB_DEF_GrB_GE_INT64_xtype int64_t
-#define GB_DEF_GrB_GE_INT64_ytype int64_t
-
-#define GB_DEF_GrB_GE_UINT64_function GB_GE_f_UINT64
-#define GB_DEF_GrB_GE_UINT64_ztype bool
-#define GB_DEF_GrB_GE_UINT64_xtype uint64_t
-#define GB_DEF_GrB_GE_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_GE_FP32_function GB_GE_f_FP32
-#define GB_DEF_GrB_GE_FP32_ztype bool
-#define GB_DEF_GrB_GE_FP32_xtype float
-#define GB_DEF_GrB_GE_FP32_ytype float
-
-#define GB_DEF_GrB_GE_FP64_function GB_GE_f_FP64
-#define GB_DEF_GrB_GE_FP64_ztype bool
-#define GB_DEF_GrB_GE_FP64_xtype double
-#define GB_DEF_GrB_GE_FP64_ytype double
-
-// op: LE
-#define GB_DEF_GrB_LE_BOOL_function GB_LE_f_BOOL
-#define GB_DEF_GrB_LE_BOOL_ztype bool
-#define GB_DEF_GrB_LE_BOOL_xtype bool
-#define GB_DEF_GrB_LE_BOOL_ytype bool
-
-#define GB_DEF_GrB_LE_INT8_function GB_LE_f_INT8
-#define GB_DEF_GrB_LE_INT8_ztype bool
-#define GB_DEF_GrB_LE_INT8_xtype int8_t
-#define GB_DEF_GrB_LE_INT8_ytype int8_t
-
-#define GB_DEF_GrB_LE_UINT8_function GB_LE_f_UINT8
-#define GB_DEF_GrB_LE_UINT8_ztype bool
-#define GB_DEF_GrB_LE_UINT8_xtype uint8_t
-#define GB_DEF_GrB_LE_UINT8_ytype uint8_t
-
-#define GB_DEF_GrB_LE_INT16_function GB_LE_f_INT16
-#define GB_DEF_GrB_LE_INT16_ztype bool
-#define GB_DEF_GrB_LE_INT16_xtype int16_t
-#define GB_DEF_GrB_LE_INT16_ytype int16_t
-
-#define GB_DEF_GrB_LE_UINT16_function GB_LE_f_UINT16
-#define GB_DEF_GrB_LE_UINT16_ztype bool
-#define GB_DEF_GrB_LE_UINT16_xtype uint16_t
-#define GB_DEF_GrB_LE_UINT16_ytype uint16_t
-
-#define GB_DEF_GrB_LE_INT32_function GB_LE_f_INT32
-#define GB_DEF_GrB_LE_INT32_ztype bool
-#define GB_DEF_GrB_LE_INT32_xtype int32_t
-#define GB_DEF_GrB_LE_INT32_ytype int32_t
-
-#define GB_DEF_GrB_LE_UINT32_function GB_LE_f_UINT32
-#define GB_DEF_GrB_LE_UINT32_ztype bool
-#define GB_DEF_GrB_LE_UINT32_xtype uint32_t
-#define GB_DEF_GrB_LE_UINT32_ytype uint32_t
-
-#define GB_DEF_GrB_LE_INT64_function GB_LE_f_INT64
-#define GB_DEF_GrB_LE_INT64_ztype bool
-#define GB_DEF_GrB_LE_INT64_xtype int64_t
-#define GB_DEF_GrB_LE_INT64_ytype int64_t
-
-#define GB_DEF_GrB_LE_UINT64_function GB_LE_f_UINT64
-#define GB_DEF_GrB_LE_UINT64_ztype bool
-#define GB_DEF_GrB_LE_UINT64_xtype uint64_t
-#define GB_DEF_GrB_LE_UINT64_ytype uint64_t
-
-#define GB_DEF_GrB_LE_FP32_function GB_LE_f_FP32
-#define GB_DEF_GrB_LE_FP32_ztype bool
-#define GB_DEF_GrB_LE_FP32_xtype float
-#define GB_DEF_GrB_LE_FP32_ytype float
-
-#define GB_DEF_GrB_LE_FP64_function GB_LE_f_FP64
-#define GB_DEF_GrB_LE_FP64_ztype bool
-#define GB_DEF_GrB_LE_FP64_xtype double
-#define GB_DEF_GrB_LE_FP64_ytype double
-
-
-//------------------------------------------------------
-// binary operators of the form z=f(x,y): bool x bool -> bool
-//------------------------------------------------------
-
-#define GB_DEF_GrB_LOR_function GB_LOR_f_BOOL
-#define GB_DEF_GrB_LOR_ztype bool
-#define GB_DEF_GrB_LOR_xtype bool
-#define GB_DEF_GrB_LOR_ytype bool
-
-#define GB_DEF_GrB_LAND_function GB_LAND_f_BOOL
-#define GB_DEF_GrB_LAND_ztype bool
-#define GB_DEF_GrB_LAND_xtype bool
-#define GB_DEF_GrB_LAND_ytype bool
-
-#define GB_DEF_GrB_LXOR_function GB_LXOR_f_BOOL
-#define GB_DEF_GrB_LXOR_ztype bool
-#define GB_DEF_GrB_LXOR_xtype bool
-#define GB_DEF_GrB_LXOR_ytype bool
-
-
-//------------------------------------------------------
-// built-in monoids
-//------------------------------------------------------
-
-// op: MIN
-#define GB_DEF_GxB_MIN_BOOL_MONOID_add GB_MIN_f_BOOL
-#define GB_DEF_GxB_MIN_INT8_MONOID_add GB_MIN_f_INT8
-#define GB_DEF_GxB_MIN_UINT8_MONOID_add GB_MIN_f_UINT8
-#define GB_DEF_GxB_MIN_INT16_MONOID_add GB_MIN_f_INT16
-#define GB_DEF_GxB_MIN_UINT16_MONOID_add GB_MIN_f_UINT16
-#define GB_DEF_GxB_MIN_INT32_MONOID_add GB_MIN_f_INT32
-#define GB_DEF_GxB_MIN_UINT32_MONOID_add GB_MIN_f_UINT32
-#define GB_DEF_GxB_MIN_INT64_MONOID_add GB_MIN_f_INT64
-#define GB_DEF_GxB_MIN_UINT64_MONOID_add GB_MIN_f_UINT64
-#define GB_DEF_GxB_MIN_FP32_MONOID_add GB_MIN_f_FP32
-#define GB_DEF_GxB_MIN_FP64_MONOID_add GB_MIN_f_FP64
-// op: MAX
-#define GB_DEF_GxB_MAX_BOOL_MONOID_add GB_MAX_f_BOOL
-#define GB_DEF_GxB_MAX_INT8_MONOID_add GB_MAX_f_INT8
-#define GB_DEF_GxB_MAX_UINT8_MONOID_add GB_MAX_f_UINT8
-#define GB_DEF_GxB_MAX_INT16_MONOID_add GB_MAX_f_INT16
-#define GB_DEF_GxB_MAX_UINT16_MONOID_add GB_MAX_f_UINT16
-#define GB_DEF_GxB_MAX_INT32_MONOID_add GB_MAX_f_INT32
-#define GB_DEF_GxB_MAX_UINT32_MONOID_add GB_MAX_f_UINT32
-#define GB_DEF_GxB_MAX_INT64_MONOID_add GB_MAX_f_INT64
-#define GB_DEF_GxB_MAX_UINT64_MONOID_add GB_MAX_f_UINT64
-#define GB_DEF_GxB_MAX_FP32_MONOID_add GB_MAX_f_FP32
-#define GB_DEF_GxB_MAX_FP64_MONOID_add GB_MAX_f_FP64
-// op: PLUS
-#define GB_DEF_GxB_PLUS_BOOL_MONOID_add GB_PLUS_f_BOOL
-#define GB_DEF_GxB_PLUS_INT8_MONOID_add GB_PLUS_f_INT8
-#define GB_DEF_GxB_PLUS_UINT8_MONOID_add GB_PLUS_f_UINT8
-#define GB_DEF_GxB_PLUS_INT16_MONOID_add GB_PLUS_f_INT16
-#define GB_DEF_GxB_PLUS_UINT16_MONOID_add GB_PLUS_f_UINT16
-#define GB_DEF_GxB_PLUS_INT32_MONOID_add GB_PLUS_f_INT32
-#define GB_DEF_GxB_PLUS_UINT32_MONOID_add GB_PLUS_f_UINT32
-#define GB_DEF_GxB_PLUS_INT64_MONOID_add GB_PLUS_f_INT64
-#define GB_DEF_GxB_PLUS_UINT64_MONOID_add GB_PLUS_f_UINT64
-#define GB_DEF_GxB_PLUS_FP32_MONOID_add GB_PLUS_f_FP32
-#define GB_DEF_GxB_PLUS_FP64_MONOID_add GB_PLUS_f_FP64
-// op: TIMES
-#define GB_DEF_GxB_TIMES_BOOL_MONOID_add GB_TIMES_f_BOOL
-#define GB_DEF_GxB_TIMES_INT8_MONOID_add GB_TIMES_f_INT8
-#define GB_DEF_GxB_TIMES_UINT8_MONOID_add GB_TIMES_f_UINT8
-#define GB_DEF_GxB_TIMES_INT16_MONOID_add GB_TIMES_f_INT16
-#define GB_DEF_GxB_TIMES_UINT16_MONOID_add GB_TIMES_f_UINT16
-#define GB_DEF_GxB_TIMES_INT32_MONOID_add GB_TIMES_f_INT32
-#define GB_DEF_GxB_TIMES_UINT32_MONOID_add GB_TIMES_f_UINT32
-#define GB_DEF_GxB_TIMES_INT64_MONOID_add GB_TIMES_f_INT64
-#define GB_DEF_GxB_TIMES_UINT64_MONOID_add GB_TIMES_f_UINT64
-#define GB_DEF_GxB_TIMES_FP32_MONOID_add GB_TIMES_f_FP32
-#define GB_DEF_GxB_TIMES_FP64_MONOID_add GB_TIMES_f_FP64
-
-// op: Boolean
-#define GB_DEF_GxB_LOR_BOOL_MONOID_add   GB_LOR_f_BOOL
-#define GB_DEF_GxB_LAND_BOOL_MONOID_add  GB_LAND_f_BOOL
-#define GB_DEF_GxB_LXOR_BOOL_MONOID_add  GB_LXOR_f_BOOL
-#define GB_DEF_GxB_EQ_BOOL_MONOID_add    GB_EQ_f_BOOL
-
-// monoid identity values
-#define GB_DEF_GxB_MIN_INT8_MONOID_identity   INT8_MAX
-#define GB_DEF_GxB_MIN_UINT8_MONOID_identity  UINT8_MAX
-#define GB_DEF_GxB_MIN_INT16_MONOID_identity  INT16_MAX
-#define GB_DEF_GxB_MIN_UINT16_MONOID_identity UINT16_MAX
-#define GB_DEF_GxB_MIN_INT32_MONOID_identity  INT32_MAX
-#define GB_DEF_GxB_MIN_UINT32_MONOID_identity UINT32_MAX
-#define GB_DEF_GxB_MIN_INT64_MONOID_identity  INT64_MAX
-#define GB_DEF_GxB_MIN_UINT64_MONOID_identity UINT64_MAX
-#define GB_DEF_GxB_MIN_FP32_MONOID_identity   INFINITY
-#define GB_DEF_GxB_MIN_FP64_MONOID_identity   INFINITY
-
-#define GB_DEF_GxB_MAX_INT8_MONOID_identity   INT8_MIN
-#define GB_DEF_GxB_MAX_UINT8_MONOID_identity  0
-#define GB_DEF_GxB_MAX_INT16_MONOID_identity  INT16_MIN
-#define GB_DEF_GxB_MAX_UINT16_MONOID_identity 0
-#define GB_DEF_GxB_MAX_INT32_MONOID_identity  INT32_MIN
-#define GB_DEF_GxB_MAX_UINT32_MONOID_identity 0
-#define GB_DEF_GxB_MAX_INT64_MONOID_identity  INT64_MIN
-#define GB_DEF_GxB_MAX_UINT64_MONOID_identity 0
-#define GB_DEF_GxB_MAX_FP32_MONOID_identity   (-INFINITY)
-#define GB_DEF_GxB_MAX_FP64_MONOID_identity   (-INFINITY)
-
-#define GB_DEF_GxB_PLUS_INT8_MONOID_identity   0
-#define GB_DEF_GxB_PLUS_UINT8_MONOID_identity  0
-#define GB_DEF_GxB_PLUS_INT16_MONOID_identity  0
-#define GB_DEF_GxB_PLUS_UINT16_MONOID_identity 0
-#define GB_DEF_GxB_PLUS_INT32_MONOID_identity  0
-#define GB_DEF_GxB_PLUS_UINT32_MONOID_identity 0
-#define GB_DEF_GxB_PLUS_INT64_MONOID_identity  0
-#define GB_DEF_GxB_PLUS_UINT64_MONOID_identity 0
-#define GB_DEF_GxB_PLUS_FP32_MONOID_identity   0
-#define GB_DEF_GxB_PLUS_FP64_MONOID_identity   0
-
-#define GB_DEF_GxB_TIMES_INT8_MONOID_identity   1
-#define GB_DEF_GxB_TIMES_UINT8_MONOID_identity  1
-#define GB_DEF_GxB_TIMES_INT16_MONOID_identity  1
-#define GB_DEF_GxB_TIMES_UINT16_MONOID_identity 1
-#define GB_DEF_GxB_TIMES_INT32_MONOID_identity  1
-#define GB_DEF_GxB_TIMES_UINT32_MONOID_identity 1
-#define GB_DEF_GxB_TIMES_INT64_MONOID_identity  1
-#define GB_DEF_GxB_TIMES_UINT64_MONOID_identity 1
-#define GB_DEF_GxB_TIMES_FP32_MONOID_identity   1
-#define GB_DEF_GxB_TIMES_FP64_MONOID_identity   1
-
-#define GB_DEF_GxB_LOR_BOOL_MONOID_identity    false
-#define GB_DEF_GxB_LAND_BOOL_MONOID_identity   true
-#define GB_DEF_GxB_LXOR_BOOL_MONOID_identity   false
-#define GB_DEF_GxB_EQ_BOOL_MONOID_identity     true
-
-// monoid terminal values
-#define GB_DEF_GxB_MIN_INT8_MONOID_terminal   INT8_MIN
-#define GB_DEF_GxB_MIN_UINT8_MONOID_terminal  0
-#define GB_DEF_GxB_MIN_INT16_MONOID_terminal  INT16_MIN
-#define GB_DEF_GxB_MIN_UINT16_MONOID_terminal 0
-#define GB_DEF_GxB_MIN_INT32_MONOID_terminal  INT32_MIN
-#define GB_DEF_GxB_MIN_UINT32_MONOID_terminal 0
-#define GB_DEF_GxB_MIN_INT64_MONOID_terminal  INT64_MIN
-#define GB_DEF_GxB_MIN_UINT64_MONOID_terminal 0
-#define GB_DEF_GxB_MIN_FP32_MONOID_terminal   (-INFINITY)
-#define GB_DEF_GxB_MIN_FP64_MONOID_terminal   (-INFINITY)
-
-#define GB_DEF_GxB_MAX_INT8_MONOID_terminal   INT8_MAX
-#define GB_DEF_GxB_MAX_UINT8_MONOID_terminal  UINT8_MAX
-#define GB_DEF_GxB_MAX_INT16_MONOID_terminal  INT16_MAX
-#define GB_DEF_GxB_MAX_UINT16_MONOID_terminal UINT16_MAX
-#define GB_DEF_GxB_MAX_INT32_MONOID_terminal  INT32_MAX
-#define GB_DEF_GxB_MAX_UINT32_MONOID_terminal UINT32_MAX
-#define GB_DEF_GxB_MAX_INT64_MONOID_terminal  INT64_MAX
-#define GB_DEF_GxB_MAX_UINT64_MONOID_terminal UINT64_MAX
-#define GB_DEF_GxB_MAX_FP32_MONOID_terminal   INFINITY
-#define GB_DEF_GxB_MAX_FP64_MONOID_terminal   INFINITY
-
-// no #define GB_DEF_GxB_PLUS_INT8_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_UINT8_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_INT16_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_UINT16_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_INT32_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_UINT32_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_INT64_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_UINT64_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_FP32_MONOID_terminal
-// no #define GB_DEF_GxB_PLUS_FP64_MONOID_terminal
-
-#define GB_DEF_GxB_TIMES_INT8_MONOID_terminal   0
-#define GB_DEF_GxB_TIMES_UINT8_MONOID_terminal  0
-#define GB_DEF_GxB_TIMES_INT16_MONOID_terminal  0
-#define GB_DEF_GxB_TIMES_UINT16_MONOID_terminal 0
-#define GB_DEF_GxB_TIMES_INT32_MONOID_terminal  0
-#define GB_DEF_GxB_TIMES_UINT32_MONOID_terminal 0
-#define GB_DEF_GxB_TIMES_INT64_MONOID_terminal  0
-#define GB_DEF_GxB_TIMES_UINT64_MONOID_terminal 0
-// no #define GB_DEF_GxB_TIMES_FP32_MONOID_terminal
-// no #define GB_DEF_GxB_TIMES_FP64_MONOID_terminal
-
-#define GB_DEF_GxB_LOR_BOOL_MONOID_terminal    true
-#define GB_DEF_GxB_LAND_BOOL_MONOID_terminal   false
-// no #define GB_DEF_GxB_LXOR_BOOL_MONOID_terminal
-// no #define GB_DEF_GxB_EQ_BOOL_MONOID_terminal
-
-#endif
-
diff --git a/Source/GB_wait.c b/Source/GB_wait.c
index 07fd0139d8..44346dbf42 100644
--- a/Source/GB_wait.c
+++ b/Source/GB_wait.c
@@ -2,7 +2,7 @@
 // GB_wait:  finish all pending computations on a single matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,11 +10,11 @@
 // CALLS:     GB_builder
 
 // This function is typically called via the GB_WAIT(A) macro, except for
-// GB_assign and GB_subassign.
+// GB_assign, GB_subassign, and GB_mxm.
 
 // The matrix A has zombies and/or pending tuples placed there by
-// GrB_setElement and GrB_*assign.  Zombies must now be deleted, and pending
-// tuples must now be assembled together and added into the matrix.
+// GrB_setElement, GrB_*assign, or GB_mxm.  Zombies must now be deleted, and
+// pending tuples must now be assembled together and added into the matrix.
 
 // When the function returns, the matrix has been removed from the queue
 // and all pending tuples and zombies have been deleted.  This is true even
@@ -64,9 +64,9 @@ GrB_Info GB_wait                // finish all pending computations
     ASSERT (A != NULL) ;
 
     // The matrix A might have pending operations but not be in the queue.
-    // GB_Matrix_check expects the matrix to be in the queue.  As a result, GB_Matrix_check
-    // can report an inconsistency, and thus this assert must be made
-    // with a negative pr.
+    // GB_Matrix_check expects the matrix to be in the queue.  As a result,
+    // GB_Matrix_check can report an inconsistency, and thus this assert must
+    // be made with a negative pr.
     ASSERT_MATRIX_OK (A, "A to wait", GB_FLIP (GB0)) ;
 
     //--------------------------------------------------------------------------
@@ -89,6 +89,13 @@ GrB_Info GB_wait                // finish all pending computations
     GrB_Info info = GrB_SUCCESS ;
 
     int64_t nzombies = A->nzombies ;
+    int64_t npending = GB_Pending_n (A) ;
+
+    if (nzombies > 0 || npending > 0)
+    { 
+        GB_BURBLE_MATRIX (A, "wait (zombies: "GBd", pending: "GBd") ",
+            nzombies, npending) ;
+    }
 
     if (nzombies > 0)
     { 
@@ -208,7 +215,12 @@ GrB_Info GB_wait                // finish all pending computations
 
     // Finally check the status of the builder.  The pending tuples, must
     // be freed (just above), whether or not the builder is successful.
-    GB_OK (info) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory in GB_builder
+        GB_FREE_ALL ;
+        return (info) ;
+    }
 
     ASSERT_MATRIX_OK (T, "T = matrix of pending tuples", GB0) ;
     ASSERT (!GB_PENDING (T)) ;
@@ -251,7 +263,7 @@ GrB_Info GB_wait                // finish all pending computations
         int64_t pright = A->nvec - 1 ;
         bool found ;
         int64_t *GB_RESTRICT Ah = A->h ;
-        GB_BINARY_SPLIT_SEARCH (tjfirst, Ah, kA, pright, found) ;
+        GB_SPLIT_BINARY_SEARCH (tjfirst, Ah, kA, pright, found) ;
         // Ah [0 ... kA-1] excludes vector tjfirst.  The list
         // Ah [kA ... A->nvec-1] includes tjfirst.
         ASSERT (kA >= 0 && kA <= A->nvec) ;
@@ -324,8 +336,8 @@ GrB_Info GB_wait                // finish all pending computations
             GB_MATRIX_FREE (&(Aslice [0])) ;
 
             // S = A1 + T, but with no operator
-            GB_OK (GB_add (&S, A->type, A->is_csc, NULL, Aslice [1], T, NULL,
-                Context)) ;
+            GB_OK (GB_add (&S, A->type, A->is_csc, NULL, 0, Aslice [1], T,
+                NULL, Context)) ;
 
             ASSERT_MATRIX_OK (S, "S = A1+T", GB0) ;
 
@@ -406,7 +418,7 @@ GrB_Info GB_wait                // finish all pending computations
         // FUTURE:: if GB_add could tolerate zombies in A, then the initial
         // prune of zombies can be skipped.
 
-        GB_OK (GB_add (&S, A->type, A->is_csc, NULL, A, T, NULL, Context)) ;
+        GB_OK (GB_add (&S, A->type, A->is_csc, NULL, 0, A, T, NULL, Context)) ;
         GB_MATRIX_FREE (&T) ;
         ASSERT_MATRIX_OK (S, "S after GB_wait:add", GB0) ;
         return (GB_transplant_conform (A, A->type, &S, Context)) ;
diff --git a/Source/Generated/GB_AxB__any_div_fp32.c b/Source/Generated/GB_AxB__any_div_fp32.c
new file mode 100644
index 0000000000..f95510f943
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_fp32
+// A'*B function (dot3):     GB_Adot3B__any_div_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik / bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik / bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x / y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x / y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_DIV_FP32 || GxB_NO_ANY_DIV_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_fp64.c b/Source/Generated/GB_AxB__any_div_fp64.c
new file mode 100644
index 0000000000..baf323ad6f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_fp64
+// A'*B function (dot3):     GB_Adot3B__any_div_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik / bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik / bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x / y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x / y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_DIV_FP64 || GxB_NO_ANY_DIV_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_int16.c b/Source/Generated/GB_AxB__any_div_int16.c
new file mode 100644
index 0000000000..0ab2337b22
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_int16
+// A'*B function (dot3):     GB_Adot3B__any_div_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = GB_IDIV_SIGNED (aik, bkj, 16)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 16) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 16)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_DIV_INT16 || GxB_NO_ANY_DIV_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_int32.c b/Source/Generated/GB_AxB__any_div_int32.c
new file mode 100644
index 0000000000..d63f093580
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_int32
+// A'*B function (dot3):     GB_Adot3B__any_div_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = GB_IDIV_SIGNED (aik, bkj, 32)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 32) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 32)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_DIV_INT32 || GxB_NO_ANY_DIV_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_int64.c b/Source/Generated/GB_AxB__any_div_int64.c
new file mode 100644
index 0000000000..744d5d28e8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_int64
+// A'*B function (dot3):     GB_Adot3B__any_div_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = GB_IDIV_SIGNED (aik, bkj, 64)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 64) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 64)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_DIV_INT64 || GxB_NO_ANY_DIV_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_int8.c b/Source/Generated/GB_AxB__any_div_int8.c
new file mode 100644
index 0000000000..33b1d0e7c6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_int8
+// A'*B function (dot3):     GB_Adot3B__any_div_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = GB_IDIV_SIGNED (aik, bkj, 8)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 8) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 8)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_DIV_INT8 || GxB_NO_ANY_DIV_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_uint16.c b/Source/Generated/GB_AxB__any_div_uint16.c
new file mode 100644
index 0000000000..5e557f96b2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_uint16
+// A'*B function (dot3):     GB_Adot3B__any_div_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 16)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 16) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 16)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_DIV_UINT16 || GxB_NO_ANY_DIV_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_uint32.c b/Source/Generated/GB_AxB__any_div_uint32.c
new file mode 100644
index 0000000000..a7791f96b5
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_uint32
+// A'*B function (dot3):     GB_Adot3B__any_div_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 32)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 32) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 32)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_DIV_UINT32 || GxB_NO_ANY_DIV_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_uint64.c b/Source/Generated/GB_AxB__any_div_uint64.c
new file mode 100644
index 0000000000..20bc423abd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_uint64
+// A'*B function (dot3):     GB_Adot3B__any_div_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 64)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 64) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 64)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_DIV_UINT64 || GxB_NO_ANY_DIV_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_div_uint8.c b/Source/Generated/GB_AxB__any_div_uint8.c
new file mode 100644
index 0000000000..65801f28ff
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_div_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_div_uint8
+// A'*B function (dot3):     GB_Adot3B__any_div_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_div_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_div_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 8)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 8) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 8)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_DIV_UINT8 || GxB_NO_ANY_DIV_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_bool.c b/Source/Generated/GB_AxB__any_eq_bool.c
new file mode 100644
index 0000000000..eb11eb1bed
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_bool
+// A'*B function (dot3):     GB_Adot3B__any_eq_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_EQ_BOOL || GxB_NO_ANY_EQ_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_fp32.c b/Source/Generated/GB_AxB__any_eq_fp32.c
new file mode 100644
index 0000000000..2c2efc972e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_fp32
+// A'*B function (dot3):     GB_Adot3B__any_eq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_EQ_FP32 || GxB_NO_ANY_EQ_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_fp64.c b/Source/Generated/GB_AxB__any_eq_fp64.c
new file mode 100644
index 0000000000..2beafd2ee4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_fp64
+// A'*B function (dot3):     GB_Adot3B__any_eq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_EQ_FP64 || GxB_NO_ANY_EQ_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_int16.c b/Source/Generated/GB_AxB__any_eq_int16.c
new file mode 100644
index 0000000000..d9efd8fe7e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_int16
+// A'*B function (dot3):     GB_Adot3B__any_eq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_EQ_INT16 || GxB_NO_ANY_EQ_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_int32.c b/Source/Generated/GB_AxB__any_eq_int32.c
new file mode 100644
index 0000000000..c25edb682d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_int32
+// A'*B function (dot3):     GB_Adot3B__any_eq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_EQ_INT32 || GxB_NO_ANY_EQ_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_int64.c b/Source/Generated/GB_AxB__any_eq_int64.c
new file mode 100644
index 0000000000..cc1654e269
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_int64
+// A'*B function (dot3):     GB_Adot3B__any_eq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_EQ_INT64 || GxB_NO_ANY_EQ_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_int8.c b/Source/Generated/GB_AxB__any_eq_int8.c
new file mode 100644
index 0000000000..5393c4a8f8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_int8
+// A'*B function (dot3):     GB_Adot3B__any_eq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_EQ_INT8 || GxB_NO_ANY_EQ_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_uint16.c b/Source/Generated/GB_AxB__any_eq_uint16.c
new file mode 100644
index 0000000000..c4d9f3a973
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_uint16
+// A'*B function (dot3):     GB_Adot3B__any_eq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_EQ_UINT16 || GxB_NO_ANY_EQ_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_uint32.c b/Source/Generated/GB_AxB__any_eq_uint32.c
new file mode 100644
index 0000000000..488cff535b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_uint32
+// A'*B function (dot3):     GB_Adot3B__any_eq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_EQ_UINT32 || GxB_NO_ANY_EQ_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_uint64.c b/Source/Generated/GB_AxB__any_eq_uint64.c
new file mode 100644
index 0000000000..12d7d9a810
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_uint64
+// A'*B function (dot3):     GB_Adot3B__any_eq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_EQ_UINT64 || GxB_NO_ANY_EQ_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_eq_uint8.c b/Source/Generated/GB_AxB__any_eq_uint8.c
new file mode 100644
index 0000000000..af36dc99b4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_eq_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_eq_uint8
+// A'*B function (dot3):     GB_Adot3B__any_eq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_eq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_eq_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_EQ_UINT8 || GxB_NO_ANY_EQ_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_bool.c b/Source/Generated/GB_AxB__any_first_bool.c
new file mode 100644
index 0000000000..4a10d83fb3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_bool
+// A'*B function (dot3):     GB_Adot3B__any_first_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_FIRST_BOOL || GxB_NO_ANY_FIRST_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_fp32.c b/Source/Generated/GB_AxB__any_first_fp32.c
new file mode 100644
index 0000000000..6c8744580b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_fp32
+// A'*B function (dot3):     GB_Adot3B__any_first_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_FIRST_FP32 || GxB_NO_ANY_FIRST_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_fp64.c b/Source/Generated/GB_AxB__any_first_fp64.c
new file mode 100644
index 0000000000..2ebd9c529f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_fp64
+// A'*B function (dot3):     GB_Adot3B__any_first_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_FIRST_FP64 || GxB_NO_ANY_FIRST_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_int16.c b/Source/Generated/GB_AxB__any_first_int16.c
new file mode 100644
index 0000000000..8184e17abf
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_int16
+// A'*B function (dot3):     GB_Adot3B__any_first_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_FIRST_INT16 || GxB_NO_ANY_FIRST_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_int32.c b/Source/Generated/GB_AxB__any_first_int32.c
new file mode 100644
index 0000000000..6c248dc71a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_int32
+// A'*B function (dot3):     GB_Adot3B__any_first_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_FIRST_INT32 || GxB_NO_ANY_FIRST_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_int64.c b/Source/Generated/GB_AxB__any_first_int64.c
new file mode 100644
index 0000000000..8577fbe9d9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_int64
+// A'*B function (dot3):     GB_Adot3B__any_first_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_FIRST_INT64 || GxB_NO_ANY_FIRST_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_int8.c b/Source/Generated/GB_AxB__any_first_int8.c
new file mode 100644
index 0000000000..6e7aafbcf6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_int8
+// A'*B function (dot3):     GB_Adot3B__any_first_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_FIRST_INT8 || GxB_NO_ANY_FIRST_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_uint16.c b/Source/Generated/GB_AxB__any_first_uint16.c
new file mode 100644
index 0000000000..629e0b3f75
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_uint16
+// A'*B function (dot3):     GB_Adot3B__any_first_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_FIRST_UINT16 || GxB_NO_ANY_FIRST_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_uint32.c b/Source/Generated/GB_AxB__any_first_uint32.c
new file mode 100644
index 0000000000..b907aea089
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_uint32
+// A'*B function (dot3):     GB_Adot3B__any_first_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_FIRST_UINT32 || GxB_NO_ANY_FIRST_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_uint64.c b/Source/Generated/GB_AxB__any_first_uint64.c
new file mode 100644
index 0000000000..18a24c1ce7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_uint64
+// A'*B function (dot3):     GB_Adot3B__any_first_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_FIRST_UINT64 || GxB_NO_ANY_FIRST_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_first_uint8.c b/Source/Generated/GB_AxB__any_first_uint8.c
new file mode 100644
index 0000000000..5a4e0824ea
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_first_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_first_uint8
+// A'*B function (dot3):     GB_Adot3B__any_first_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_first_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_first_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = aik
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = aik
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = x
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = x
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_FIRST_UINT8 || GxB_NO_ANY_FIRST_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_bool.c b/Source/Generated/GB_AxB__any_ge_bool.c
new file mode 100644
index 0000000000..db19895b61
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_bool
+// A'*B function (dot3):     GB_Adot3B__any_ge_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_GE_BOOL || GxB_NO_ANY_GE_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_fp32.c b/Source/Generated/GB_AxB__any_ge_fp32.c
new file mode 100644
index 0000000000..8c6f609552
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_fp32
+// A'*B function (dot3):     GB_Adot3B__any_ge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_GE_FP32 || GxB_NO_ANY_GE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_fp64.c b/Source/Generated/GB_AxB__any_ge_fp64.c
new file mode 100644
index 0000000000..48f9e79f05
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_fp64
+// A'*B function (dot3):     GB_Adot3B__any_ge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_GE_FP64 || GxB_NO_ANY_GE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_int16.c b/Source/Generated/GB_AxB__any_ge_int16.c
new file mode 100644
index 0000000000..05b945fdf9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_int16
+// A'*B function (dot3):     GB_Adot3B__any_ge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_GE_INT16 || GxB_NO_ANY_GE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_int32.c b/Source/Generated/GB_AxB__any_ge_int32.c
new file mode 100644
index 0000000000..ffde9030f8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_int32
+// A'*B function (dot3):     GB_Adot3B__any_ge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_GE_INT32 || GxB_NO_ANY_GE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_int64.c b/Source/Generated/GB_AxB__any_ge_int64.c
new file mode 100644
index 0000000000..f6e887f4a6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_int64
+// A'*B function (dot3):     GB_Adot3B__any_ge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_GE_INT64 || GxB_NO_ANY_GE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_int8.c b/Source/Generated/GB_AxB__any_ge_int8.c
new file mode 100644
index 0000000000..130dfe9f74
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_int8
+// A'*B function (dot3):     GB_Adot3B__any_ge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_GE_INT8 || GxB_NO_ANY_GE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_uint16.c b/Source/Generated/GB_AxB__any_ge_uint16.c
new file mode 100644
index 0000000000..35458bbed3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_uint16
+// A'*B function (dot3):     GB_Adot3B__any_ge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_GE_UINT16 || GxB_NO_ANY_GE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_uint32.c b/Source/Generated/GB_AxB__any_ge_uint32.c
new file mode 100644
index 0000000000..058464a69e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_uint32
+// A'*B function (dot3):     GB_Adot3B__any_ge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_GE_UINT32 || GxB_NO_ANY_GE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_uint64.c b/Source/Generated/GB_AxB__any_ge_uint64.c
new file mode 100644
index 0000000000..c969d90e71
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_uint64
+// A'*B function (dot3):     GB_Adot3B__any_ge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_GE_UINT64 || GxB_NO_ANY_GE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ge_uint8.c b/Source/Generated/GB_AxB__any_ge_uint8.c
new file mode 100644
index 0000000000..0f56b26cb3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ge_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ge_uint8
+// A'*B function (dot3):     GB_Adot3B__any_ge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_ge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ge_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_GE_UINT8 || GxB_NO_ANY_GE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_bool.c b/Source/Generated/GB_AxB__any_gt_bool.c
new file mode 100644
index 0000000000..113fb055ad
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_bool
+// A'*B function (dot3):     GB_Adot3B__any_gt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_GT_BOOL || GxB_NO_ANY_GT_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_fp32.c b/Source/Generated/GB_AxB__any_gt_fp32.c
new file mode 100644
index 0000000000..08cd4b32bc
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_fp32
+// A'*B function (dot3):     GB_Adot3B__any_gt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_GT_FP32 || GxB_NO_ANY_GT_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_fp64.c b/Source/Generated/GB_AxB__any_gt_fp64.c
new file mode 100644
index 0000000000..c0ae8591ce
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_fp64
+// A'*B function (dot3):     GB_Adot3B__any_gt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_GT_FP64 || GxB_NO_ANY_GT_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_int16.c b/Source/Generated/GB_AxB__any_gt_int16.c
new file mode 100644
index 0000000000..9d6a8f30d1
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_int16
+// A'*B function (dot3):     GB_Adot3B__any_gt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_GT_INT16 || GxB_NO_ANY_GT_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_int32.c b/Source/Generated/GB_AxB__any_gt_int32.c
new file mode 100644
index 0000000000..036e0f8801
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_int32
+// A'*B function (dot3):     GB_Adot3B__any_gt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_GT_INT32 || GxB_NO_ANY_GT_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_int64.c b/Source/Generated/GB_AxB__any_gt_int64.c
new file mode 100644
index 0000000000..55954ff0bb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_int64
+// A'*B function (dot3):     GB_Adot3B__any_gt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_GT_INT64 || GxB_NO_ANY_GT_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_int8.c b/Source/Generated/GB_AxB__any_gt_int8.c
new file mode 100644
index 0000000000..2b640e8826
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_int8
+// A'*B function (dot3):     GB_Adot3B__any_gt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_GT_INT8 || GxB_NO_ANY_GT_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_uint16.c b/Source/Generated/GB_AxB__any_gt_uint16.c
new file mode 100644
index 0000000000..e4c5a8e8cb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_uint16
+// A'*B function (dot3):     GB_Adot3B__any_gt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_GT_UINT16 || GxB_NO_ANY_GT_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_uint32.c b/Source/Generated/GB_AxB__any_gt_uint32.c
new file mode 100644
index 0000000000..f1179c721c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_uint32
+// A'*B function (dot3):     GB_Adot3B__any_gt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_GT_UINT32 || GxB_NO_ANY_GT_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_uint64.c b/Source/Generated/GB_AxB__any_gt_uint64.c
new file mode 100644
index 0000000000..056e891a4d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_uint64
+// A'*B function (dot3):     GB_Adot3B__any_gt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_GT_UINT64 || GxB_NO_ANY_GT_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_gt_uint8.c b/Source/Generated/GB_AxB__any_gt_uint8.c
new file mode 100644
index 0000000000..b58564aad9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_gt_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_gt_uint8
+// A'*B function (dot3):     GB_Adot3B__any_gt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_gt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_gt_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_GT_UINT8 || GxB_NO_ANY_GT_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_fp32.c b/Source/Generated/GB_AxB__any_iseq_fp32.c
new file mode 100644
index 0000000000..6b4058d259
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_fp32
+// A'*B function (dot3):     GB_Adot3B__any_iseq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISEQ_FP32 || GxB_NO_ANY_ISEQ_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_fp64.c b/Source/Generated/GB_AxB__any_iseq_fp64.c
new file mode 100644
index 0000000000..b3696192c1
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_fp64
+// A'*B function (dot3):     GB_Adot3B__any_iseq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISEQ_FP64 || GxB_NO_ANY_ISEQ_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_int16.c b/Source/Generated/GB_AxB__any_iseq_int16.c
new file mode 100644
index 0000000000..431177f099
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_int16
+// A'*B function (dot3):     GB_Adot3B__any_iseq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISEQ_INT16 || GxB_NO_ANY_ISEQ_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_int32.c b/Source/Generated/GB_AxB__any_iseq_int32.c
new file mode 100644
index 0000000000..e7986ff6ba
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_int32
+// A'*B function (dot3):     GB_Adot3B__any_iseq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISEQ_INT32 || GxB_NO_ANY_ISEQ_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_int64.c b/Source/Generated/GB_AxB__any_iseq_int64.c
new file mode 100644
index 0000000000..5db59c9897
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_int64
+// A'*B function (dot3):     GB_Adot3B__any_iseq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISEQ_INT64 || GxB_NO_ANY_ISEQ_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_int8.c b/Source/Generated/GB_AxB__any_iseq_int8.c
new file mode 100644
index 0000000000..354168f75b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_int8
+// A'*B function (dot3):     GB_Adot3B__any_iseq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISEQ_INT8 || GxB_NO_ANY_ISEQ_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_uint16.c b/Source/Generated/GB_AxB__any_iseq_uint16.c
new file mode 100644
index 0000000000..e8fdee9d52
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_uint16
+// A'*B function (dot3):     GB_Adot3B__any_iseq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISEQ_UINT16 || GxB_NO_ANY_ISEQ_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_uint32.c b/Source/Generated/GB_AxB__any_iseq_uint32.c
new file mode 100644
index 0000000000..fd34511ec4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_uint32
+// A'*B function (dot3):     GB_Adot3B__any_iseq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISEQ_UINT32 || GxB_NO_ANY_ISEQ_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_uint64.c b/Source/Generated/GB_AxB__any_iseq_uint64.c
new file mode 100644
index 0000000000..acd9c67b9a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_uint64
+// A'*B function (dot3):     GB_Adot3B__any_iseq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISEQ_UINT64 || GxB_NO_ANY_ISEQ_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_iseq_uint8.c b/Source/Generated/GB_AxB__any_iseq_uint8.c
new file mode 100644
index 0000000000..eb6e1cc0b2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_iseq_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_iseq_uint8
+// A'*B function (dot3):     GB_Adot3B__any_iseq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_iseq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_iseq_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik == bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik == bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x == y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x == y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISEQ_UINT8 || GxB_NO_ANY_ISEQ_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_fp32.c b/Source/Generated/GB_AxB__any_isge_fp32.c
new file mode 100644
index 0000000000..6d2164ff91
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_fp32
+// A'*B function (dot3):     GB_Adot3B__any_isge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISGE_FP32 || GxB_NO_ANY_ISGE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_fp64.c b/Source/Generated/GB_AxB__any_isge_fp64.c
new file mode 100644
index 0000000000..06c28a7d9d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_fp64
+// A'*B function (dot3):     GB_Adot3B__any_isge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISGE_FP64 || GxB_NO_ANY_ISGE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_int16.c b/Source/Generated/GB_AxB__any_isge_int16.c
new file mode 100644
index 0000000000..92c6e775ce
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_int16
+// A'*B function (dot3):     GB_Adot3B__any_isge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISGE_INT16 || GxB_NO_ANY_ISGE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_int32.c b/Source/Generated/GB_AxB__any_isge_int32.c
new file mode 100644
index 0000000000..16ef50946b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_int32
+// A'*B function (dot3):     GB_Adot3B__any_isge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISGE_INT32 || GxB_NO_ANY_ISGE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_int64.c b/Source/Generated/GB_AxB__any_isge_int64.c
new file mode 100644
index 0000000000..20b4512290
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_int64
+// A'*B function (dot3):     GB_Adot3B__any_isge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISGE_INT64 || GxB_NO_ANY_ISGE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_int8.c b/Source/Generated/GB_AxB__any_isge_int8.c
new file mode 100644
index 0000000000..6f0d495ea9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_int8
+// A'*B function (dot3):     GB_Adot3B__any_isge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISGE_INT8 || GxB_NO_ANY_ISGE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_uint16.c b/Source/Generated/GB_AxB__any_isge_uint16.c
new file mode 100644
index 0000000000..dcbbb33ffa
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_uint16
+// A'*B function (dot3):     GB_Adot3B__any_isge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISGE_UINT16 || GxB_NO_ANY_ISGE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_uint32.c b/Source/Generated/GB_AxB__any_isge_uint32.c
new file mode 100644
index 0000000000..a186b804e7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_uint32
+// A'*B function (dot3):     GB_Adot3B__any_isge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISGE_UINT32 || GxB_NO_ANY_ISGE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_uint64.c b/Source/Generated/GB_AxB__any_isge_uint64.c
new file mode 100644
index 0000000000..39744f83a4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_uint64
+// A'*B function (dot3):     GB_Adot3B__any_isge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISGE_UINT64 || GxB_NO_ANY_ISGE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isge_uint8.c b/Source/Generated/GB_AxB__any_isge_uint8.c
new file mode 100644
index 0000000000..7c5c4340d4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isge_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isge_uint8
+// A'*B function (dot3):     GB_Adot3B__any_isge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isge_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik >= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik >= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x >= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISGE_UINT8 || GxB_NO_ANY_ISGE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_fp32.c b/Source/Generated/GB_AxB__any_isgt_fp32.c
new file mode 100644
index 0000000000..39e926651e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_fp32
+// A'*B function (dot3):     GB_Adot3B__any_isgt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISGT_FP32 || GxB_NO_ANY_ISGT_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_fp64.c b/Source/Generated/GB_AxB__any_isgt_fp64.c
new file mode 100644
index 0000000000..94367f7832
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_fp64
+// A'*B function (dot3):     GB_Adot3B__any_isgt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISGT_FP64 || GxB_NO_ANY_ISGT_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_int16.c b/Source/Generated/GB_AxB__any_isgt_int16.c
new file mode 100644
index 0000000000..633666d247
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_int16
+// A'*B function (dot3):     GB_Adot3B__any_isgt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISGT_INT16 || GxB_NO_ANY_ISGT_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_int32.c b/Source/Generated/GB_AxB__any_isgt_int32.c
new file mode 100644
index 0000000000..990431843d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_int32
+// A'*B function (dot3):     GB_Adot3B__any_isgt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISGT_INT32 || GxB_NO_ANY_ISGT_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_int64.c b/Source/Generated/GB_AxB__any_isgt_int64.c
new file mode 100644
index 0000000000..ae125089a1
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_int64
+// A'*B function (dot3):     GB_Adot3B__any_isgt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISGT_INT64 || GxB_NO_ANY_ISGT_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_int8.c b/Source/Generated/GB_AxB__any_isgt_int8.c
new file mode 100644
index 0000000000..53a456ae2c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_int8
+// A'*B function (dot3):     GB_Adot3B__any_isgt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISGT_INT8 || GxB_NO_ANY_ISGT_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_uint16.c b/Source/Generated/GB_AxB__any_isgt_uint16.c
new file mode 100644
index 0000000000..e5a7a99a96
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_uint16
+// A'*B function (dot3):     GB_Adot3B__any_isgt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISGT_UINT16 || GxB_NO_ANY_ISGT_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_uint32.c b/Source/Generated/GB_AxB__any_isgt_uint32.c
new file mode 100644
index 0000000000..1b5393901a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_uint32
+// A'*B function (dot3):     GB_Adot3B__any_isgt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISGT_UINT32 || GxB_NO_ANY_ISGT_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_uint64.c b/Source/Generated/GB_AxB__any_isgt_uint64.c
new file mode 100644
index 0000000000..1137e43fe0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_uint64
+// A'*B function (dot3):     GB_Adot3B__any_isgt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISGT_UINT64 || GxB_NO_ANY_ISGT_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isgt_uint8.c b/Source/Generated/GB_AxB__any_isgt_uint8.c
new file mode 100644
index 0000000000..115e42d237
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isgt_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isgt_uint8
+// A'*B function (dot3):     GB_Adot3B__any_isgt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isgt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isgt_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik > bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik > bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x > y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x > y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISGT_UINT8 || GxB_NO_ANY_ISGT_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_fp32.c b/Source/Generated/GB_AxB__any_isle_fp32.c
new file mode 100644
index 0000000000..a75e78be44
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_fp32
+// A'*B function (dot3):     GB_Adot3B__any_isle_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISLE_FP32 || GxB_NO_ANY_ISLE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_fp64.c b/Source/Generated/GB_AxB__any_isle_fp64.c
new file mode 100644
index 0000000000..f8a1df07cf
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_fp64
+// A'*B function (dot3):     GB_Adot3B__any_isle_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISLE_FP64 || GxB_NO_ANY_ISLE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_int16.c b/Source/Generated/GB_AxB__any_isle_int16.c
new file mode 100644
index 0000000000..0a296a508c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_int16
+// A'*B function (dot3):     GB_Adot3B__any_isle_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISLE_INT16 || GxB_NO_ANY_ISLE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_int32.c b/Source/Generated/GB_AxB__any_isle_int32.c
new file mode 100644
index 0000000000..bf21580fb8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_int32
+// A'*B function (dot3):     GB_Adot3B__any_isle_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISLE_INT32 || GxB_NO_ANY_ISLE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_int64.c b/Source/Generated/GB_AxB__any_isle_int64.c
new file mode 100644
index 0000000000..2d164c10b0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_int64
+// A'*B function (dot3):     GB_Adot3B__any_isle_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISLE_INT64 || GxB_NO_ANY_ISLE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_int8.c b/Source/Generated/GB_AxB__any_isle_int8.c
new file mode 100644
index 0000000000..3268c644f0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_int8
+// A'*B function (dot3):     GB_Adot3B__any_isle_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISLE_INT8 || GxB_NO_ANY_ISLE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_uint16.c b/Source/Generated/GB_AxB__any_isle_uint16.c
new file mode 100644
index 0000000000..d3c969fc4d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_uint16
+// A'*B function (dot3):     GB_Adot3B__any_isle_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISLE_UINT16 || GxB_NO_ANY_ISLE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_uint32.c b/Source/Generated/GB_AxB__any_isle_uint32.c
new file mode 100644
index 0000000000..210baaa96f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_uint32
+// A'*B function (dot3):     GB_Adot3B__any_isle_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISLE_UINT32 || GxB_NO_ANY_ISLE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_uint64.c b/Source/Generated/GB_AxB__any_isle_uint64.c
new file mode 100644
index 0000000000..f0fb35c37c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_uint64
+// A'*B function (dot3):     GB_Adot3B__any_isle_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISLE_UINT64 || GxB_NO_ANY_ISLE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isle_uint8.c b/Source/Generated/GB_AxB__any_isle_uint8.c
new file mode 100644
index 0000000000..b8afed32db
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isle_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isle_uint8
+// A'*B function (dot3):     GB_Adot3B__any_isle_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isle_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isle_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISLE_UINT8 || GxB_NO_ANY_ISLE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_fp32.c b/Source/Generated/GB_AxB__any_islt_fp32.c
new file mode 100644
index 0000000000..d8054458cc
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_fp32
+// A'*B function (dot3):     GB_Adot3B__any_islt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISLT_FP32 || GxB_NO_ANY_ISLT_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_fp64.c b/Source/Generated/GB_AxB__any_islt_fp64.c
new file mode 100644
index 0000000000..3d708c722b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_fp64
+// A'*B function (dot3):     GB_Adot3B__any_islt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISLT_FP64 || GxB_NO_ANY_ISLT_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_int16.c b/Source/Generated/GB_AxB__any_islt_int16.c
new file mode 100644
index 0000000000..9eff1c9d38
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_int16
+// A'*B function (dot3):     GB_Adot3B__any_islt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISLT_INT16 || GxB_NO_ANY_ISLT_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_int32.c b/Source/Generated/GB_AxB__any_islt_int32.c
new file mode 100644
index 0000000000..a0ce804428
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_int32
+// A'*B function (dot3):     GB_Adot3B__any_islt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISLT_INT32 || GxB_NO_ANY_ISLT_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_int64.c b/Source/Generated/GB_AxB__any_islt_int64.c
new file mode 100644
index 0000000000..a02e380067
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_int64
+// A'*B function (dot3):     GB_Adot3B__any_islt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISLT_INT64 || GxB_NO_ANY_ISLT_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_int8.c b/Source/Generated/GB_AxB__any_islt_int8.c
new file mode 100644
index 0000000000..6a057fb445
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_int8
+// A'*B function (dot3):     GB_Adot3B__any_islt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISLT_INT8 || GxB_NO_ANY_ISLT_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_uint16.c b/Source/Generated/GB_AxB__any_islt_uint16.c
new file mode 100644
index 0000000000..8669e14236
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_uint16
+// A'*B function (dot3):     GB_Adot3B__any_islt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISLT_UINT16 || GxB_NO_ANY_ISLT_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_uint32.c b/Source/Generated/GB_AxB__any_islt_uint32.c
new file mode 100644
index 0000000000..b592668f08
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_uint32
+// A'*B function (dot3):     GB_Adot3B__any_islt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISLT_UINT32 || GxB_NO_ANY_ISLT_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_uint64.c b/Source/Generated/GB_AxB__any_islt_uint64.c
new file mode 100644
index 0000000000..c829d36cd7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_uint64
+// A'*B function (dot3):     GB_Adot3B__any_islt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISLT_UINT64 || GxB_NO_ANY_ISLT_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_islt_uint8.c b/Source/Generated/GB_AxB__any_islt_uint8.c
new file mode 100644
index 0000000000..aa6f74d73a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_islt_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_islt_uint8
+// A'*B function (dot3):     GB_Adot3B__any_islt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_islt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_islt_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISLT_UINT8 || GxB_NO_ANY_ISLT_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_fp32.c b/Source/Generated/GB_AxB__any_isne_fp32.c
new file mode 100644
index 0000000000..6c20ee4d66
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_fp32
+// A'*B function (dot3):     GB_Adot3B__any_isne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_ISNE_FP32 || GxB_NO_ANY_ISNE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_fp64.c b/Source/Generated/GB_AxB__any_isne_fp64.c
new file mode 100644
index 0000000000..bc97b8c20b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_fp64
+// A'*B function (dot3):     GB_Adot3B__any_isne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_ISNE_FP64 || GxB_NO_ANY_ISNE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_int16.c b/Source/Generated/GB_AxB__any_isne_int16.c
new file mode 100644
index 0000000000..2d5bef4980
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_int16
+// A'*B function (dot3):     GB_Adot3B__any_isne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_ISNE_INT16 || GxB_NO_ANY_ISNE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_int32.c b/Source/Generated/GB_AxB__any_isne_int32.c
new file mode 100644
index 0000000000..9958f42e0e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_int32
+// A'*B function (dot3):     GB_Adot3B__any_isne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_ISNE_INT32 || GxB_NO_ANY_ISNE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_int64.c b/Source/Generated/GB_AxB__any_isne_int64.c
new file mode 100644
index 0000000000..358cd44d74
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_int64
+// A'*B function (dot3):     GB_Adot3B__any_isne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_ISNE_INT64 || GxB_NO_ANY_ISNE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_int8.c b/Source/Generated/GB_AxB__any_isne_int8.c
new file mode 100644
index 0000000000..8d19f84005
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_int8
+// A'*B function (dot3):     GB_Adot3B__any_isne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_ISNE_INT8 || GxB_NO_ANY_ISNE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_uint16.c b/Source/Generated/GB_AxB__any_isne_uint16.c
new file mode 100644
index 0000000000..9483bc06e6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_uint16
+// A'*B function (dot3):     GB_Adot3B__any_isne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_ISNE_UINT16 || GxB_NO_ANY_ISNE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_uint32.c b/Source/Generated/GB_AxB__any_isne_uint32.c
new file mode 100644
index 0000000000..7844e4fedb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_uint32
+// A'*B function (dot3):     GB_Adot3B__any_isne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_ISNE_UINT32 || GxB_NO_ANY_ISNE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_uint64.c b/Source/Generated/GB_AxB__any_isne_uint64.c
new file mode 100644
index 0000000000..5813204965
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_uint64
+// A'*B function (dot3):     GB_Adot3B__any_isne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_ISNE_UINT64 || GxB_NO_ANY_ISNE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_isne_uint8.c b/Source/Generated/GB_AxB__any_isne_uint8.c
new file mode 100644
index 0000000000..2810401d25
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_isne_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_isne_uint8
+// A'*B function (dot3):     GB_Adot3B__any_isne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_isne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_isne_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_ISNE_UINT8 || GxB_NO_ANY_ISNE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_bool.c b/Source/Generated/GB_AxB__any_land_bool.c
new file mode 100644
index 0000000000..5a139b6bc9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_bool
+// A'*B function (dot3):     GB_Adot3B__any_land_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik && bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik && bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x && y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x && y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_LAND_BOOL || GxB_NO_ANY_LAND_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_fp32.c b/Source/Generated/GB_AxB__any_land_fp32.c
new file mode 100644
index 0000000000..c3f45d2213
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_fp32
+// A'*B function (dot3):     GB_Adot3B__any_land_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) && (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_LAND_FP32 || GxB_NO_ANY_LAND_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_fp64.c b/Source/Generated/GB_AxB__any_land_fp64.c
new file mode 100644
index 0000000000..4d8d8a8629
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_fp64
+// A'*B function (dot3):     GB_Adot3B__any_land_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) && (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_LAND_FP64 || GxB_NO_ANY_LAND_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_int16.c b/Source/Generated/GB_AxB__any_land_int16.c
new file mode 100644
index 0000000000..8b56733f14
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_int16
+// A'*B function (dot3):     GB_Adot3B__any_land_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_LAND_INT16 || GxB_NO_ANY_LAND_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_int32.c b/Source/Generated/GB_AxB__any_land_int32.c
new file mode 100644
index 0000000000..e866de0bc3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_int32
+// A'*B function (dot3):     GB_Adot3B__any_land_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_LAND_INT32 || GxB_NO_ANY_LAND_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_int64.c b/Source/Generated/GB_AxB__any_land_int64.c
new file mode 100644
index 0000000000..b993c736a9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_int64
+// A'*B function (dot3):     GB_Adot3B__any_land_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_LAND_INT64 || GxB_NO_ANY_LAND_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_int8.c b/Source/Generated/GB_AxB__any_land_int8.c
new file mode 100644
index 0000000000..8d215d56f4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_int8
+// A'*B function (dot3):     GB_Adot3B__any_land_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_LAND_INT8 || GxB_NO_ANY_LAND_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_uint16.c b/Source/Generated/GB_AxB__any_land_uint16.c
new file mode 100644
index 0000000000..f41aa50c77
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_uint16
+// A'*B function (dot3):     GB_Adot3B__any_land_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_LAND_UINT16 || GxB_NO_ANY_LAND_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_uint32.c b/Source/Generated/GB_AxB__any_land_uint32.c
new file mode 100644
index 0000000000..ee12d3801f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_uint32
+// A'*B function (dot3):     GB_Adot3B__any_land_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_LAND_UINT32 || GxB_NO_ANY_LAND_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_uint64.c b/Source/Generated/GB_AxB__any_land_uint64.c
new file mode 100644
index 0000000000..22e365881d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_uint64
+// A'*B function (dot3):     GB_Adot3B__any_land_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_LAND_UINT64 || GxB_NO_ANY_LAND_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_land_uint8.c b/Source/Generated/GB_AxB__any_land_uint8.c
new file mode 100644
index 0000000000..a8334a1ff6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_land_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_land_uint8
+// A'*B function (dot3):     GB_Adot3B__any_land_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_land_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_land_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = ((aik != 0) && (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_LAND_UINT8 || GxB_NO_ANY_LAND_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_bool.c b/Source/Generated/GB_AxB__any_le_bool.c
new file mode 100644
index 0000000000..04300e1add
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_bool
+// A'*B function (dot3):     GB_Adot3B__any_le_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_LE_BOOL || GxB_NO_ANY_LE_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_fp32.c b/Source/Generated/GB_AxB__any_le_fp32.c
new file mode 100644
index 0000000000..61b83985e4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_fp32
+// A'*B function (dot3):     GB_Adot3B__any_le_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_LE_FP32 || GxB_NO_ANY_LE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_fp64.c b/Source/Generated/GB_AxB__any_le_fp64.c
new file mode 100644
index 0000000000..e1c8d94595
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_fp64
+// A'*B function (dot3):     GB_Adot3B__any_le_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_LE_FP64 || GxB_NO_ANY_LE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_int16.c b/Source/Generated/GB_AxB__any_le_int16.c
new file mode 100644
index 0000000000..b8413af877
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_int16
+// A'*B function (dot3):     GB_Adot3B__any_le_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_LE_INT16 || GxB_NO_ANY_LE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_int32.c b/Source/Generated/GB_AxB__any_le_int32.c
new file mode 100644
index 0000000000..c47ead8375
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_int32
+// A'*B function (dot3):     GB_Adot3B__any_le_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_LE_INT32 || GxB_NO_ANY_LE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_int64.c b/Source/Generated/GB_AxB__any_le_int64.c
new file mode 100644
index 0000000000..636f0612ea
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_int64
+// A'*B function (dot3):     GB_Adot3B__any_le_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_LE_INT64 || GxB_NO_ANY_LE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_int8.c b/Source/Generated/GB_AxB__any_le_int8.c
new file mode 100644
index 0000000000..d85b6d8b68
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_int8
+// A'*B function (dot3):     GB_Adot3B__any_le_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_LE_INT8 || GxB_NO_ANY_LE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_uint16.c b/Source/Generated/GB_AxB__any_le_uint16.c
new file mode 100644
index 0000000000..5a2c3cece1
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_uint16
+// A'*B function (dot3):     GB_Adot3B__any_le_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_LE_UINT16 || GxB_NO_ANY_LE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_uint32.c b/Source/Generated/GB_AxB__any_le_uint32.c
new file mode 100644
index 0000000000..d84e30ad30
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_uint32
+// A'*B function (dot3):     GB_Adot3B__any_le_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_LE_UINT32 || GxB_NO_ANY_LE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_uint64.c b/Source/Generated/GB_AxB__any_le_uint64.c
new file mode 100644
index 0000000000..d025da22eb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_uint64
+// A'*B function (dot3):     GB_Adot3B__any_le_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_LE_UINT64 || GxB_NO_ANY_LE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_le_uint8.c b/Source/Generated/GB_AxB__any_le_uint8.c
new file mode 100644
index 0000000000..c13ec3d3d3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_le_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_le_uint8
+// A'*B function (dot3):     GB_Adot3B__any_le_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_le_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_le_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik <= bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik <= bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x <= y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_LE_UINT8 || GxB_NO_ANY_LE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_bool.c b/Source/Generated/GB_AxB__any_lor_bool.c
new file mode 100644
index 0000000000..22f8180aab
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_bool
+// A'*B function (dot3):     GB_Adot3B__any_lor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik || bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik || bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x || y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x || y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_LOR_BOOL || GxB_NO_ANY_LOR_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_fp32.c b/Source/Generated/GB_AxB__any_lor_fp32.c
new file mode 100644
index 0000000000..a3b846c3d2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_fp32
+// A'*B function (dot3):     GB_Adot3B__any_lor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) || (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_LOR_FP32 || GxB_NO_ANY_LOR_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_fp64.c b/Source/Generated/GB_AxB__any_lor_fp64.c
new file mode 100644
index 0000000000..d6d73db28d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_fp64
+// A'*B function (dot3):     GB_Adot3B__any_lor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) || (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_LOR_FP64 || GxB_NO_ANY_LOR_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_int16.c b/Source/Generated/GB_AxB__any_lor_int16.c
new file mode 100644
index 0000000000..923fb4550b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_int16
+// A'*B function (dot3):     GB_Adot3B__any_lor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_LOR_INT16 || GxB_NO_ANY_LOR_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_int32.c b/Source/Generated/GB_AxB__any_lor_int32.c
new file mode 100644
index 0000000000..164ae7b199
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_int32
+// A'*B function (dot3):     GB_Adot3B__any_lor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_LOR_INT32 || GxB_NO_ANY_LOR_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_int64.c b/Source/Generated/GB_AxB__any_lor_int64.c
new file mode 100644
index 0000000000..cbec7e7e11
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_int64
+// A'*B function (dot3):     GB_Adot3B__any_lor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_LOR_INT64 || GxB_NO_ANY_LOR_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_int8.c b/Source/Generated/GB_AxB__any_lor_int8.c
new file mode 100644
index 0000000000..df942cb9f4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_int8
+// A'*B function (dot3):     GB_Adot3B__any_lor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_LOR_INT8 || GxB_NO_ANY_LOR_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_uint16.c b/Source/Generated/GB_AxB__any_lor_uint16.c
new file mode 100644
index 0000000000..9f2e41e2b5
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_uint16
+// A'*B function (dot3):     GB_Adot3B__any_lor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_LOR_UINT16 || GxB_NO_ANY_LOR_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_uint32.c b/Source/Generated/GB_AxB__any_lor_uint32.c
new file mode 100644
index 0000000000..9022a464b0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_uint32
+// A'*B function (dot3):     GB_Adot3B__any_lor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_LOR_UINT32 || GxB_NO_ANY_LOR_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_uint64.c b/Source/Generated/GB_AxB__any_lor_uint64.c
new file mode 100644
index 0000000000..3c01df97e2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_uint64
+// A'*B function (dot3):     GB_Adot3B__any_lor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_LOR_UINT64 || GxB_NO_ANY_LOR_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lor_uint8.c b/Source/Generated/GB_AxB__any_lor_uint8.c
new file mode 100644
index 0000000000..aa5b3ecd7c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lor_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lor_uint8
+// A'*B function (dot3):     GB_Adot3B__any_lor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lor_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = ((aik != 0) || (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_LOR_UINT8 || GxB_NO_ANY_LOR_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_bool.c b/Source/Generated/GB_AxB__any_lt_bool.c
new file mode 100644
index 0000000000..7ae44382bd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_bool
+// A'*B function (dot3):     GB_Adot3B__any_lt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_LT_BOOL || GxB_NO_ANY_LT_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_fp32.c b/Source/Generated/GB_AxB__any_lt_fp32.c
new file mode 100644
index 0000000000..2d9b076cc5
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_fp32
+// A'*B function (dot3):     GB_Adot3B__any_lt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_LT_FP32 || GxB_NO_ANY_LT_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_fp64.c b/Source/Generated/GB_AxB__any_lt_fp64.c
new file mode 100644
index 0000000000..c6b5920974
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_fp64
+// A'*B function (dot3):     GB_Adot3B__any_lt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_LT_FP64 || GxB_NO_ANY_LT_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_int16.c b/Source/Generated/GB_AxB__any_lt_int16.c
new file mode 100644
index 0000000000..c64fb5a1cc
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_int16
+// A'*B function (dot3):     GB_Adot3B__any_lt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_LT_INT16 || GxB_NO_ANY_LT_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_int32.c b/Source/Generated/GB_AxB__any_lt_int32.c
new file mode 100644
index 0000000000..3ff83521d2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_int32
+// A'*B function (dot3):     GB_Adot3B__any_lt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_LT_INT32 || GxB_NO_ANY_LT_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_int64.c b/Source/Generated/GB_AxB__any_lt_int64.c
new file mode 100644
index 0000000000..7421260563
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_int64
+// A'*B function (dot3):     GB_Adot3B__any_lt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_LT_INT64 || GxB_NO_ANY_LT_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_int8.c b/Source/Generated/GB_AxB__any_lt_int8.c
new file mode 100644
index 0000000000..f35a47449b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_int8
+// A'*B function (dot3):     GB_Adot3B__any_lt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_LT_INT8 || GxB_NO_ANY_LT_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_uint16.c b/Source/Generated/GB_AxB__any_lt_uint16.c
new file mode 100644
index 0000000000..27011dbeac
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_uint16
+// A'*B function (dot3):     GB_Adot3B__any_lt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_LT_UINT16 || GxB_NO_ANY_LT_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_uint32.c b/Source/Generated/GB_AxB__any_lt_uint32.c
new file mode 100644
index 0000000000..f19a553f8d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_uint32
+// A'*B function (dot3):     GB_Adot3B__any_lt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_LT_UINT32 || GxB_NO_ANY_LT_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_uint64.c b/Source/Generated/GB_AxB__any_lt_uint64.c
new file mode 100644
index 0000000000..559ad6fa88
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_uint64
+// A'*B function (dot3):     GB_Adot3B__any_lt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_LT_UINT64 || GxB_NO_ANY_LT_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lt_uint8.c b/Source/Generated/GB_AxB__any_lt_uint8.c
new file mode 100644
index 0000000000..a992aa5458
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lt_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lt_uint8
+// A'*B function (dot3):     GB_Adot3B__any_lt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lt_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik < bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik < bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x < y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x < y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_LT_UINT8 || GxB_NO_ANY_LT_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_bool.c b/Source/Generated/GB_AxB__any_lxor_bool.c
new file mode 100644
index 0000000000..f1a29ae013
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_bool
+// A'*B function (dot3):     GB_Adot3B__any_lxor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    bool aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_ANY_LXOR_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_fp32.c b/Source/Generated/GB_AxB__any_lxor_fp32.c
new file mode 100644
index 0000000000..cfa2048274
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_fp32
+// A'*B function (dot3):     GB_Adot3B__any_lxor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) != (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_LXOR_FP32 || GxB_NO_ANY_LXOR_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_fp64.c b/Source/Generated/GB_AxB__any_lxor_fp64.c
new file mode 100644
index 0000000000..c276282b2c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_fp64
+// A'*B function (dot3):     GB_Adot3B__any_lxor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = ((aik != 0) != (bkj != 0))
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_LXOR_FP64 || GxB_NO_ANY_LXOR_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_int16.c b/Source/Generated/GB_AxB__any_lxor_int16.c
new file mode 100644
index 0000000000..f9f8e33e53
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_int16
+// A'*B function (dot3):     GB_Adot3B__any_lxor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_LXOR_INT16 || GxB_NO_ANY_LXOR_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_int32.c b/Source/Generated/GB_AxB__any_lxor_int32.c
new file mode 100644
index 0000000000..4331792f3f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_int32
+// A'*B function (dot3):     GB_Adot3B__any_lxor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_LXOR_INT32 || GxB_NO_ANY_LXOR_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_int64.c b/Source/Generated/GB_AxB__any_lxor_int64.c
new file mode 100644
index 0000000000..cd845c5079
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_int64
+// A'*B function (dot3):     GB_Adot3B__any_lxor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_LXOR_INT64 || GxB_NO_ANY_LXOR_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_int8.c b/Source/Generated/GB_AxB__any_lxor_int8.c
new file mode 100644
index 0000000000..3feae576d4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_int8
+// A'*B function (dot3):     GB_Adot3B__any_lxor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_LXOR_INT8 || GxB_NO_ANY_LXOR_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_uint16.c b/Source/Generated/GB_AxB__any_lxor_uint16.c
new file mode 100644
index 0000000000..4f28e1ca70
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_uint16
+// A'*B function (dot3):     GB_Adot3B__any_lxor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_LXOR_UINT16 || GxB_NO_ANY_LXOR_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_uint32.c b/Source/Generated/GB_AxB__any_lxor_uint32.c
new file mode 100644
index 0000000000..73564805aa
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_uint32
+// A'*B function (dot3):     GB_Adot3B__any_lxor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_LXOR_UINT32 || GxB_NO_ANY_LXOR_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_uint64.c b/Source/Generated/GB_AxB__any_lxor_uint64.c
new file mode 100644
index 0000000000..5a8d510952
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_uint64
+// A'*B function (dot3):     GB_Adot3B__any_lxor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_LXOR_UINT64 || GxB_NO_ANY_LXOR_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_lxor_uint8.c b/Source/Generated/GB_AxB__any_lxor_uint8.c
new file mode 100644
index 0000000000..f4ab81ba6a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_lxor_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_lxor_uint8
+// A'*B function (dot3):     GB_Adot3B__any_lxor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_lxor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_lxor_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = ((aik != 0) != (bkj != 0))
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_LXOR_UINT8 || GxB_NO_ANY_LXOR_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_fp32.c b/Source/Generated/GB_AxB__any_max_fp32.c
new file mode 100644
index 0000000000..ad1d80f02e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_fp32
+// A'*B function (dot3):     GB_Adot3B__any_max_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = fmaxf (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = fmaxf (aik, bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = fmaxf (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (x, y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ANY_MAX_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_fp64.c b/Source/Generated/GB_AxB__any_max_fp64.c
new file mode 100644
index 0000000000..a5a3966df8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_fp64
+// A'*B function (dot3):     GB_Adot3B__any_max_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = fmax (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = fmax (aik, bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = fmax (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = fmax (x, y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ANY_MAX_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_int16.c b/Source/Generated/GB_AxB__any_max_int16.c
new file mode 100644
index 0000000000..fa4fae4d85
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_int16
+// A'*B function (dot3):     GB_Adot3B__any_max_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ANY_MAX_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_int32.c b/Source/Generated/GB_AxB__any_max_int32.c
new file mode 100644
index 0000000000..30dc2fc8eb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_int32
+// A'*B function (dot3):     GB_Adot3B__any_max_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ANY_MAX_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_int64.c b/Source/Generated/GB_AxB__any_max_int64.c
new file mode 100644
index 0000000000..7c1284edeb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_int64
+// A'*B function (dot3):     GB_Adot3B__any_max_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ANY_MAX_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_int8.c b/Source/Generated/GB_AxB__any_max_int8.c
new file mode 100644
index 0000000000..d5df101074
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_int8
+// A'*B function (dot3):     GB_Adot3B__any_max_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ANY_MAX_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_uint16.c b/Source/Generated/GB_AxB__any_max_uint16.c
new file mode 100644
index 0000000000..578fc5112b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_uint16
+// A'*B function (dot3):     GB_Adot3B__any_max_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ANY_MAX_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_uint32.c b/Source/Generated/GB_AxB__any_max_uint32.c
new file mode 100644
index 0000000000..d5093b23fd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_uint32
+// A'*B function (dot3):     GB_Adot3B__any_max_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ANY_MAX_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_uint64.c b/Source/Generated/GB_AxB__any_max_uint64.c
new file mode 100644
index 0000000000..85333599a0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_uint64
+// A'*B function (dot3):     GB_Adot3B__any_max_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ANY_MAX_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_max_uint8.c b/Source/Generated/GB_AxB__any_max_uint8.c
new file mode 100644
index 0000000000..93b21ecec1
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_max_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_max_uint8
+// A'*B function (dot3):     GB_Adot3B__any_max_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_max_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_max_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = GB_IMAX (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = GB_IMAX (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMAX (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ANY_MAX_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_fp32.c b/Source/Generated/GB_AxB__any_min_fp32.c
new file mode 100644
index 0000000000..888d538970
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_fp32
+// A'*B function (dot3):     GB_Adot3B__any_min_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = fminf (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = fminf (aik, bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = fminf (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = fminf (x, y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ANY_MIN_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_fp64.c b/Source/Generated/GB_AxB__any_min_fp64.c
new file mode 100644
index 0000000000..23c7ed88ee
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_fp64
+// A'*B function (dot3):     GB_Adot3B__any_min_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = fmin (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = fmin (aik, bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = fmin (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = fmin (x, y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ANY_MIN_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_int16.c b/Source/Generated/GB_AxB__any_min_int16.c
new file mode 100644
index 0000000000..9e1f8d40fd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_int16
+// A'*B function (dot3):     GB_Adot3B__any_min_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ANY_MIN_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_int32.c b/Source/Generated/GB_AxB__any_min_int32.c
new file mode 100644
index 0000000000..2d471ff347
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_int32
+// A'*B function (dot3):     GB_Adot3B__any_min_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ANY_MIN_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_int64.c b/Source/Generated/GB_AxB__any_min_int64.c
new file mode 100644
index 0000000000..3862a541a4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_int64
+// A'*B function (dot3):     GB_Adot3B__any_min_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ANY_MIN_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_int8.c b/Source/Generated/GB_AxB__any_min_int8.c
new file mode 100644
index 0000000000..b592d2f6c0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_int8
+// A'*B function (dot3):     GB_Adot3B__any_min_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ANY_MIN_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_uint16.c b/Source/Generated/GB_AxB__any_min_uint16.c
new file mode 100644
index 0000000000..8732307239
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_uint16
+// A'*B function (dot3):     GB_Adot3B__any_min_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ANY_MIN_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_uint32.c b/Source/Generated/GB_AxB__any_min_uint32.c
new file mode 100644
index 0000000000..cf3dedc76e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_uint32
+// A'*B function (dot3):     GB_Adot3B__any_min_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ANY_MIN_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_uint64.c b/Source/Generated/GB_AxB__any_min_uint64.c
new file mode 100644
index 0000000000..ab371a0c61
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_uint64
+// A'*B function (dot3):     GB_Adot3B__any_min_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ANY_MIN_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_min_uint8.c b/Source/Generated/GB_AxB__any_min_uint8.c
new file mode 100644
index 0000000000..99844d5f09
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_min_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_min_uint8
+// A'*B function (dot3):     GB_Adot3B__any_min_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_min_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_min_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = GB_IMIN (aik, bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = GB_IMIN (aik, bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMIN (x, y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ANY_MIN_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_fp32.c b/Source/Generated/GB_AxB__any_minus_fp32.c
new file mode 100644
index 0000000000..ea50fe4ca6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_fp32
+// A'*B function (dot3):     GB_Adot3B__any_minus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik - bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x - y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_MINUS_FP32 || GxB_NO_ANY_MINUS_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_fp64.c b/Source/Generated/GB_AxB__any_minus_fp64.c
new file mode 100644
index 0000000000..735eba8e09
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_fp64
+// A'*B function (dot3):     GB_Adot3B__any_minus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik - bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x - y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_MINUS_FP64 || GxB_NO_ANY_MINUS_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_int16.c b/Source/Generated/GB_AxB__any_minus_int16.c
new file mode 100644
index 0000000000..c93cac1e5a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_int16
+// A'*B function (dot3):     GB_Adot3B__any_minus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_MINUS_INT16 || GxB_NO_ANY_MINUS_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_int32.c b/Source/Generated/GB_AxB__any_minus_int32.c
new file mode 100644
index 0000000000..cd2ab565df
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_int32
+// A'*B function (dot3):     GB_Adot3B__any_minus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_MINUS_INT32 || GxB_NO_ANY_MINUS_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_int64.c b/Source/Generated/GB_AxB__any_minus_int64.c
new file mode 100644
index 0000000000..88d588d685
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_int64
+// A'*B function (dot3):     GB_Adot3B__any_minus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_MINUS_INT64 || GxB_NO_ANY_MINUS_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_int8.c b/Source/Generated/GB_AxB__any_minus_int8.c
new file mode 100644
index 0000000000..c1ae4211b3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_int8
+// A'*B function (dot3):     GB_Adot3B__any_minus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_MINUS_INT8 || GxB_NO_ANY_MINUS_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_uint16.c b/Source/Generated/GB_AxB__any_minus_uint16.c
new file mode 100644
index 0000000000..325f086511
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_uint16
+// A'*B function (dot3):     GB_Adot3B__any_minus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_MINUS_UINT16 || GxB_NO_ANY_MINUS_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_uint32.c b/Source/Generated/GB_AxB__any_minus_uint32.c
new file mode 100644
index 0000000000..25460a15c3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_uint32
+// A'*B function (dot3):     GB_Adot3B__any_minus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_MINUS_UINT32 || GxB_NO_ANY_MINUS_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_uint64.c b/Source/Generated/GB_AxB__any_minus_uint64.c
new file mode 100644
index 0000000000..df9dd1d37d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_uint64
+// A'*B function (dot3):     GB_Adot3B__any_minus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_MINUS_UINT64 || GxB_NO_ANY_MINUS_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_minus_uint8.c b/Source/Generated/GB_AxB__any_minus_uint8.c
new file mode 100644
index 0000000000..fb511045c8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_minus_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_minus_uint8
+// A'*B function (dot3):     GB_Adot3B__any_minus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_minus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_minus_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik - bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = (aik - bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x - y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x - y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_MINUS_UINT8 || GxB_NO_ANY_MINUS_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_fp32.c b/Source/Generated/GB_AxB__any_ne_fp32.c
new file mode 100644
index 0000000000..84bde7c2b0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_fp32
+// A'*B function (dot3):     GB_Adot3B__any_ne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_fp32
+
+// C type:   bool
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_FP32 || GxB_NO_ANY_BOOL || GxB_NO_NE_FP32 || GxB_NO_ANY_NE_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_fp64.c b/Source/Generated/GB_AxB__any_ne_fp64.c
new file mode 100644
index 0000000000..ccefe19357
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_fp64
+// A'*B function (dot3):     GB_Adot3B__any_ne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_fp64
+
+// C type:   bool
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_FP64 || GxB_NO_ANY_BOOL || GxB_NO_NE_FP64 || GxB_NO_ANY_NE_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_int16.c b/Source/Generated/GB_AxB__any_ne_int16.c
new file mode 100644
index 0000000000..45383a033e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_int16
+// A'*B function (dot3):     GB_Adot3B__any_ne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_int16
+
+// C type:   bool
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_INT16 || GxB_NO_ANY_BOOL || GxB_NO_NE_INT16 || GxB_NO_ANY_NE_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_int32.c b/Source/Generated/GB_AxB__any_ne_int32.c
new file mode 100644
index 0000000000..31f0eb160c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_int32
+// A'*B function (dot3):     GB_Adot3B__any_ne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_int32
+
+// C type:   bool
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_INT32 || GxB_NO_ANY_BOOL || GxB_NO_NE_INT32 || GxB_NO_ANY_NE_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_int64.c b/Source/Generated/GB_AxB__any_ne_int64.c
new file mode 100644
index 0000000000..931a59a221
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_int64
+// A'*B function (dot3):     GB_Adot3B__any_ne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_int64
+
+// C type:   bool
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_INT64 || GxB_NO_ANY_BOOL || GxB_NO_NE_INT64 || GxB_NO_ANY_NE_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_int8.c b/Source/Generated/GB_AxB__any_ne_int8.c
new file mode 100644
index 0000000000..db645390fb
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_int8
+// A'*B function (dot3):     GB_Adot3B__any_ne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_int8
+
+// C type:   bool
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_INT8 || GxB_NO_ANY_BOOL || GxB_NO_NE_INT8 || GxB_NO_ANY_NE_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_uint16.c b/Source/Generated/GB_AxB__any_ne_uint16.c
new file mode 100644
index 0000000000..2adae8e1f8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_uint16
+// A'*B function (dot3):     GB_Adot3B__any_ne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_uint16
+
+// C type:   bool
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_ANY_BOOL || GxB_NO_NE_UINT16 || GxB_NO_ANY_NE_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_uint32.c b/Source/Generated/GB_AxB__any_ne_uint32.c
new file mode 100644
index 0000000000..307c54f238
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_uint32
+// A'*B function (dot3):     GB_Adot3B__any_ne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_uint32
+
+// C type:   bool
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_ANY_BOOL || GxB_NO_NE_UINT32 || GxB_NO_ANY_NE_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_uint64.c b/Source/Generated/GB_AxB__any_ne_uint64.c
new file mode 100644
index 0000000000..8ea4d01b22
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_uint64
+// A'*B function (dot3):     GB_Adot3B__any_ne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_uint64
+
+// C type:   bool
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_ANY_BOOL || GxB_NO_NE_UINT64 || GxB_NO_ANY_NE_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_ne_uint8.c b/Source/Generated/GB_AxB__any_ne_uint8.c
new file mode 100644
index 0000000000..ebf249410c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_ne_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_ne_uint8
+// A'*B function (dot3):     GB_Adot3B__any_ne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_ne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_ne_uint8
+
+// C type:   bool
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik != bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik != bkj)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x != y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x != y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_ANY_BOOL || GxB_NO_NE_UINT8 || GxB_NO_ANY_NE_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_bool.c b/Source/Generated/GB_AxB__any_pair_bool.c
new file mode 100644
index 0000000000..2ef16b642f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_bool
+// A'*B function (dot3):     GB_Adot3B__any_pair_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_PAIR_BOOL || GxB_NO_ANY_PAIR_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_fp32.c b/Source/Generated/GB_AxB__any_pair_fp32.c
new file mode 100644
index 0000000000..515f1b4383
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_fp32
+// A'*B function (dot3):     GB_Adot3B__any_pair_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_PAIR_FP32 || GxB_NO_ANY_PAIR_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_fp64.c b/Source/Generated/GB_AxB__any_pair_fp64.c
new file mode 100644
index 0000000000..45711634a5
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_fp64
+// A'*B function (dot3):     GB_Adot3B__any_pair_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_PAIR_FP64 || GxB_NO_ANY_PAIR_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_int16.c b/Source/Generated/GB_AxB__any_pair_int16.c
new file mode 100644
index 0000000000..55e5d2817b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_int16
+// A'*B function (dot3):     GB_Adot3B__any_pair_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_PAIR_INT16 || GxB_NO_ANY_PAIR_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_int32.c b/Source/Generated/GB_AxB__any_pair_int32.c
new file mode 100644
index 0000000000..4acfcbad35
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_int32
+// A'*B function (dot3):     GB_Adot3B__any_pair_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_PAIR_INT32 || GxB_NO_ANY_PAIR_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_int64.c b/Source/Generated/GB_AxB__any_pair_int64.c
new file mode 100644
index 0000000000..be1557f563
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_int64
+// A'*B function (dot3):     GB_Adot3B__any_pair_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_PAIR_INT64 || GxB_NO_ANY_PAIR_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_int8.c b/Source/Generated/GB_AxB__any_pair_int8.c
new file mode 100644
index 0000000000..ec21dfe698
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_int8
+// A'*B function (dot3):     GB_Adot3B__any_pair_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_PAIR_INT8 || GxB_NO_ANY_PAIR_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_uint16.c b/Source/Generated/GB_AxB__any_pair_uint16.c
new file mode 100644
index 0000000000..20cc4313fc
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_uint16
+// A'*B function (dot3):     GB_Adot3B__any_pair_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_PAIR_UINT16 || GxB_NO_ANY_PAIR_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_uint32.c b/Source/Generated/GB_AxB__any_pair_uint32.c
new file mode 100644
index 0000000000..3947c13478
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_uint32
+// A'*B function (dot3):     GB_Adot3B__any_pair_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_PAIR_UINT32 || GxB_NO_ANY_PAIR_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_uint64.c b/Source/Generated/GB_AxB__any_pair_uint64.c
new file mode 100644
index 0000000000..5c430bf5b7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_uint64
+// A'*B function (dot3):     GB_Adot3B__any_pair_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_PAIR_UINT64 || GxB_NO_ANY_PAIR_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_pair_uint8.c b/Source/Generated/GB_AxB__any_pair_uint8.c
new file mode 100644
index 0000000000..8b8bae7fae
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_pair_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_pair_uint8
+// A'*B function (dot3):     GB_Adot3B__any_pair_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_pair_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_pair_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = 1
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = 1
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    1
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PAIR || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_PAIR_UINT8 || GxB_NO_ANY_PAIR_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_fp32.c b/Source/Generated/GB_AxB__any_plus_fp32.c
new file mode 100644
index 0000000000..4abe48e2f2
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_fp32
+// A'*B function (dot3):     GB_Adot3B__any_plus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik + bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x + y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ANY_PLUS_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_fp64.c b/Source/Generated/GB_AxB__any_plus_fp64.c
new file mode 100644
index 0000000000..d32bce57d7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_fp64
+// A'*B function (dot3):     GB_Adot3B__any_plus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik + bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x + y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ANY_PLUS_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_int16.c b/Source/Generated/GB_AxB__any_plus_int16.c
new file mode 100644
index 0000000000..7678dcea40
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_int16
+// A'*B function (dot3):     GB_Adot3B__any_plus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ANY_PLUS_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_int32.c b/Source/Generated/GB_AxB__any_plus_int32.c
new file mode 100644
index 0000000000..c7f8b97224
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_int32
+// A'*B function (dot3):     GB_Adot3B__any_plus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ANY_PLUS_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_int64.c b/Source/Generated/GB_AxB__any_plus_int64.c
new file mode 100644
index 0000000000..eaa63d6d6a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_int64
+// A'*B function (dot3):     GB_Adot3B__any_plus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ANY_PLUS_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_int8.c b/Source/Generated/GB_AxB__any_plus_int8.c
new file mode 100644
index 0000000000..20540aa4a7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_int8
+// A'*B function (dot3):     GB_Adot3B__any_plus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ANY_PLUS_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_uint16.c b/Source/Generated/GB_AxB__any_plus_uint16.c
new file mode 100644
index 0000000000..9ff51e9cde
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_uint16
+// A'*B function (dot3):     GB_Adot3B__any_plus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ANY_PLUS_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_uint32.c b/Source/Generated/GB_AxB__any_plus_uint32.c
new file mode 100644
index 0000000000..74fd0ba97a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_uint32
+// A'*B function (dot3):     GB_Adot3B__any_plus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ANY_PLUS_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_uint64.c b/Source/Generated/GB_AxB__any_plus_uint64.c
new file mode 100644
index 0000000000..897a023e1c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_uint64
+// A'*B function (dot3):     GB_Adot3B__any_plus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ANY_PLUS_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_plus_uint8.c b/Source/Generated/GB_AxB__any_plus_uint8.c
new file mode 100644
index 0000000000..b3d20d710d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_plus_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_plus_uint8
+// A'*B function (dot3):     GB_Adot3B__any_plus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_plus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_plus_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik + bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = (aik + bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x + y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x + y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ANY_PLUS_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_fp32.c b/Source/Generated/GB_AxB__any_rdiv_fp32.c
new file mode 100644
index 0000000000..d913c64885
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_fp32
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (bkj / aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (bkj / aik)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y / x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (y / x)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_RDIV_FP32 || GxB_NO_ANY_RDIV_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_fp64.c b/Source/Generated/GB_AxB__any_rdiv_fp64.c
new file mode 100644
index 0000000000..8d5c771a1a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_fp64
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (bkj / aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (bkj / aik)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y / x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (y / x)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_RDIV_FP64 || GxB_NO_ANY_RDIV_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_int16.c b/Source/Generated/GB_AxB__any_rdiv_int16.c
new file mode 100644
index 0000000000..67ea10fff6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_int16
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = GB_IDIV_SIGNED (bkj, aik, 16)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 16) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 16)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_RDIV_INT16 || GxB_NO_ANY_RDIV_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_int32.c b/Source/Generated/GB_AxB__any_rdiv_int32.c
new file mode 100644
index 0000000000..56ff30e04e
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_int32
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = GB_IDIV_SIGNED (bkj, aik, 32)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 32) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 32)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_RDIV_INT32 || GxB_NO_ANY_RDIV_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_int64.c b/Source/Generated/GB_AxB__any_rdiv_int64.c
new file mode 100644
index 0000000000..96f8fb3b71
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_int64
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = GB_IDIV_SIGNED (bkj, aik, 64)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 64) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 64)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_RDIV_INT64 || GxB_NO_ANY_RDIV_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_int8.c b/Source/Generated/GB_AxB__any_rdiv_int8.c
new file mode 100644
index 0000000000..96c3123c4a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_int8
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = GB_IDIV_SIGNED (bkj, aik, 8)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 8) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 8)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_RDIV_INT8 || GxB_NO_ANY_RDIV_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_uint16.c b/Source/Generated/GB_AxB__any_rdiv_uint16.c
new file mode 100644
index 0000000000..bc95a3fce6
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_uint16
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 16)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 16) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 16)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_RDIV_UINT16 || GxB_NO_ANY_RDIV_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_uint32.c b/Source/Generated/GB_AxB__any_rdiv_uint32.c
new file mode 100644
index 0000000000..4d8212c26f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_uint32
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 32)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 32) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 32)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_RDIV_UINT32 || GxB_NO_ANY_RDIV_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_uint64.c b/Source/Generated/GB_AxB__any_rdiv_uint64.c
new file mode 100644
index 0000000000..71ed660460
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_uint64
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 64)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 64) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 64)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_RDIV_UINT64 || GxB_NO_ANY_RDIV_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rdiv_uint8.c b/Source/Generated/GB_AxB__any_rdiv_uint8.c
new file mode 100644
index 0000000000..3f21b9e9c4
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rdiv_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rdiv_uint8
+// A'*B function (dot3):     GB_Adot3B__any_rdiv_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_rdiv_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rdiv_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 8)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 8) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 8)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_RDIV_UINT8 || GxB_NO_ANY_RDIV_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_fp32.c b/Source/Generated/GB_AxB__any_rminus_fp32.c
new file mode 100644
index 0000000000..785853af71
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_fp32
+// A'*B function (dot3):     GB_Adot3B__any_rminus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (bkj - aik)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (y - x)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_RMINUS_FP32 || GxB_NO_ANY_RMINUS_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_fp64.c b/Source/Generated/GB_AxB__any_rminus_fp64.c
new file mode 100644
index 0000000000..f39d1e55f3
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_fp64
+// A'*B function (dot3):     GB_Adot3B__any_rminus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (bkj - aik)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (y - x)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_RMINUS_FP64 || GxB_NO_ANY_RMINUS_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_int16.c b/Source/Generated/GB_AxB__any_rminus_int16.c
new file mode 100644
index 0000000000..adb7d823dd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_int16
+// A'*B function (dot3):     GB_Adot3B__any_rminus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_RMINUS_INT16 || GxB_NO_ANY_RMINUS_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_int32.c b/Source/Generated/GB_AxB__any_rminus_int32.c
new file mode 100644
index 0000000000..d42ca58ab7
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_int32
+// A'*B function (dot3):     GB_Adot3B__any_rminus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_RMINUS_INT32 || GxB_NO_ANY_RMINUS_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_int64.c b/Source/Generated/GB_AxB__any_rminus_int64.c
new file mode 100644
index 0000000000..e34a8839ca
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_int64
+// A'*B function (dot3):     GB_Adot3B__any_rminus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_RMINUS_INT64 || GxB_NO_ANY_RMINUS_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_int8.c b/Source/Generated/GB_AxB__any_rminus_int8.c
new file mode 100644
index 0000000000..2859719498
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_int8
+// A'*B function (dot3):     GB_Adot3B__any_rminus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_RMINUS_INT8 || GxB_NO_ANY_RMINUS_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_uint16.c b/Source/Generated/GB_AxB__any_rminus_uint16.c
new file mode 100644
index 0000000000..51729c9293
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_uint16
+// A'*B function (dot3):     GB_Adot3B__any_rminus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_RMINUS_UINT16 || GxB_NO_ANY_RMINUS_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_uint32.c b/Source/Generated/GB_AxB__any_rminus_uint32.c
new file mode 100644
index 0000000000..d976e1c408
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_uint32
+// A'*B function (dot3):     GB_Adot3B__any_rminus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_RMINUS_UINT32 || GxB_NO_ANY_RMINUS_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_uint64.c b/Source/Generated/GB_AxB__any_rminus_uint64.c
new file mode 100644
index 0000000000..d0555ac94a
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_uint64
+// A'*B function (dot3):     GB_Adot3B__any_rminus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_RMINUS_UINT64 || GxB_NO_ANY_RMINUS_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_rminus_uint8.c b/Source/Generated/GB_AxB__any_rminus_uint8.c
new file mode 100644
index 0000000000..f8c195eebd
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_rminus_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_rminus_uint8
+// A'*B function (dot3):     GB_Adot3B__any_rminus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_rminus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_rminus_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (bkj - aik)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = (bkj - aik) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (y - x)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (y - x) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_RMINUS_UINT8 || GxB_NO_ANY_RMINUS_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_bool.c b/Source/Generated/GB_AxB__any_second_bool.c
new file mode 100644
index 0000000000..eacf336181
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_bool
+// A'*B function (dot3):     GB_Adot3B__any_second_bool
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    bool bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_ANY_BOOL || GxB_NO_SECOND_BOOL || GxB_NO_ANY_SECOND_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_fp32.c b/Source/Generated/GB_AxB__any_second_fp32.c
new file mode 100644
index 0000000000..df0e577de9
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_fp32
+// A'*B function (dot3):     GB_Adot3B__any_second_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_SECOND_FP32 || GxB_NO_ANY_SECOND_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_fp64.c b/Source/Generated/GB_AxB__any_second_fp64.c
new file mode 100644
index 0000000000..7a64253e2b
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_fp64
+// A'*B function (dot3):     GB_Adot3B__any_second_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_SECOND_FP64 || GxB_NO_ANY_SECOND_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_int16.c b/Source/Generated/GB_AxB__any_second_int16.c
new file mode 100644
index 0000000000..305a550153
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_int16
+// A'*B function (dot3):     GB_Adot3B__any_second_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_SECOND_INT16 || GxB_NO_ANY_SECOND_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_int32.c b/Source/Generated/GB_AxB__any_second_int32.c
new file mode 100644
index 0000000000..7d3b71b7c5
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_int32
+// A'*B function (dot3):     GB_Adot3B__any_second_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_SECOND_INT32 || GxB_NO_ANY_SECOND_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_int64.c b/Source/Generated/GB_AxB__any_second_int64.c
new file mode 100644
index 0000000000..710e7924b0
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_int64
+// A'*B function (dot3):     GB_Adot3B__any_second_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_SECOND_INT64 || GxB_NO_ANY_SECOND_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_int8.c b/Source/Generated/GB_AxB__any_second_int8.c
new file mode 100644
index 0000000000..ea047fd20c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_int8
+// A'*B function (dot3):     GB_Adot3B__any_second_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_SECOND_INT8 || GxB_NO_ANY_SECOND_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_uint16.c b/Source/Generated/GB_AxB__any_second_uint16.c
new file mode 100644
index 0000000000..d526677bbc
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_uint16
+// A'*B function (dot3):     GB_Adot3B__any_second_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_SECOND_UINT16 || GxB_NO_ANY_SECOND_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_uint32.c b/Source/Generated/GB_AxB__any_second_uint32.c
new file mode 100644
index 0000000000..b6c37426ed
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_uint32
+// A'*B function (dot3):     GB_Adot3B__any_second_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_SECOND_UINT32 || GxB_NO_ANY_SECOND_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_uint64.c b/Source/Generated/GB_AxB__any_second_uint64.c
new file mode 100644
index 0000000000..e91023a301
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_uint64
+// A'*B function (dot3):     GB_Adot3B__any_second_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_SECOND_UINT64 || GxB_NO_ANY_SECOND_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_second_uint8.c b/Source/Generated/GB_AxB__any_second_uint8.c
new file mode 100644
index 0000000000..049c45042c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_second_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_second_uint8
+// A'*B function (dot3):     GB_Adot3B__any_second_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_second_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_second_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = bkj
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = bkj
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = y
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_SECOND_UINT8 || GxB_NO_ANY_SECOND_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_fp32.c b/Source/Generated/GB_AxB__any_times_fp32.c
new file mode 100644
index 0000000000..0ae6ac0c7d
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_fp32
+// A'*B function (dot3):     GB_Adot3B__any_times_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik * bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    float aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    float bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x * y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_ANY_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ANY_TIMES_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_fp64.c b/Source/Generated/GB_AxB__any_times_fp64.c
new file mode 100644
index 0000000000..55a13d2e32
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_fp64
+// A'*B function (dot3):     GB_Adot3B__any_times_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  cij = (aik * bkj)
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    double aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    double bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z = (x * y)
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_ANY_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ANY_TIMES_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_int16.c b/Source/Generated/GB_AxB__any_times_int16.c
new file mode 100644
index 0000000000..89cbb16b7f
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_int16
+// A'*B function (dot3):     GB_Adot3B__any_times_int16
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int16_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_ANY_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ANY_TIMES_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_int32.c b/Source/Generated/GB_AxB__any_times_int32.c
new file mode 100644
index 0000000000..9a64a7f12c
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_int32
+// A'*B function (dot3):     GB_Adot3B__any_times_int32
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int32_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_ANY_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ANY_TIMES_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_int64.c b/Source/Generated/GB_AxB__any_times_int64.c
new file mode 100644
index 0000000000..aa78347427
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_int64
+// A'*B function (dot3):     GB_Adot3B__any_times_int64
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int64_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_ANY_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ANY_TIMES_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_int8.c b/Source/Generated/GB_AxB__any_times_int8.c
new file mode 100644
index 0000000000..798f219e75
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_int8
+// A'*B function (dot3):     GB_Adot3B__any_times_int8
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  int8_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    int8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    int8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_ANY_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ANY_TIMES_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_uint16.c b/Source/Generated/GB_AxB__any_times_uint16.c
new file mode 100644
index 0000000000..e1186a1816
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_uint16
+// A'*B function (dot3):     GB_Adot3B__any_times_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint16_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint16_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint16_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_ANY_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ANY_TIMES_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_uint32.c b/Source/Generated/GB_AxB__any_times_uint32.c
new file mode 100644
index 0000000000..248fed40d8
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_uint32
+// A'*B function (dot3):     GB_Adot3B__any_times_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint32_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint32_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint32_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_ANY_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ANY_TIMES_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_uint64.c b/Source/Generated/GB_AxB__any_times_uint64.c
new file mode 100644
index 0000000000..25c830efae
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_uint64
+// A'*B function (dot3):     GB_Adot3B__any_times_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint64_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint64_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint64_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_ANY_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ANY_TIMES_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__any_times_uint8.c b/Source/Generated/GB_AxB__any_times_uint8.c
new file mode 100644
index 0000000000..c7f5f9a628
--- /dev/null
+++ b/Source/Generated/GB_AxB__any_times_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__any_times_uint8
+// A'*B function (dot3):     GB_Adot3B__any_times_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__any_times_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__any_times_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = (aik * bkj)
+// Add:      cij = z
+//           'any' monoid?  1
+//           atomic?        1
+//           OpenMP atomic? 0
+// MultAdd:  uint8_t x_op_y = (aik * bkj) ; cij = x_op_y
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    uint8_t aik = Ax [pA]
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    uint8_t bkj = Bx [pB]
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = (x * y)
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x * y) ; z = x_op_y
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    break ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    ;
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    1
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_ANY_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ANY_TIMES_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__eq_eq_bool.c b/Source/Generated/GB_AxB__eq_eq_bool.c
index ec37f99b70..60437e2de9 100644
--- a/Source/Generated/GB_AxB__eq_eq_bool.c
+++ b/Source/Generated/GB_AxB__eq_eq_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_bool
 // A'*B function (dot2):     GB_Adot2B__eq_eq_bool
 // A'*B function (dot3):     GB_Adot3B__eq_eq_bool
-// A*B function (heap):      GB_AheapB__eq_eq_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_EQ_EQ_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_bool
 GrB_Info GB_Adot2B__eq_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_bool
 GrB_Info GB_Adot3B__eq_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_fp32.c b/Source/Generated/GB_AxB__eq_eq_fp32.c
index d99d4e0e92..04acef49da 100644
--- a/Source/Generated/GB_AxB__eq_eq_fp32.c
+++ b/Source/Generated/GB_AxB__eq_eq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_eq_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_eq_fp32
-// A*B function (heap):      GB_AheapB__eq_eq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_EQ_FP32 || GxB_NO_EQ_EQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_fp32
 GrB_Info GB_Adot2B__eq_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_fp32
 GrB_Info GB_Adot3B__eq_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_fp64.c b/Source/Generated/GB_AxB__eq_eq_fp64.c
index 14e050521f..89f2877018 100644
--- a/Source/Generated/GB_AxB__eq_eq_fp64.c
+++ b/Source/Generated/GB_AxB__eq_eq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_eq_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_eq_fp64
-// A*B function (heap):      GB_AheapB__eq_eq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_EQ_FP64 || GxB_NO_EQ_EQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_fp64
 GrB_Info GB_Adot2B__eq_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_fp64
 GrB_Info GB_Adot3B__eq_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_int16.c b/Source/Generated/GB_AxB__eq_eq_int16.c
index c7ce48da26..350f557aca 100644
--- a/Source/Generated/GB_AxB__eq_eq_int16.c
+++ b/Source/Generated/GB_AxB__eq_eq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_int16
 // A'*B function (dot2):     GB_Adot2B__eq_eq_int16
 // A'*B function (dot3):     GB_Adot3B__eq_eq_int16
-// A*B function (heap):      GB_AheapB__eq_eq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_EQ_INT16 || GxB_NO_EQ_EQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_int16
 GrB_Info GB_Adot2B__eq_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_int16
 GrB_Info GB_Adot3B__eq_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_int32.c b/Source/Generated/GB_AxB__eq_eq_int32.c
index 3c57454d75..71876d8abb 100644
--- a/Source/Generated/GB_AxB__eq_eq_int32.c
+++ b/Source/Generated/GB_AxB__eq_eq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_int32
 // A'*B function (dot2):     GB_Adot2B__eq_eq_int32
 // A'*B function (dot3):     GB_Adot3B__eq_eq_int32
-// A*B function (heap):      GB_AheapB__eq_eq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_EQ_INT32 || GxB_NO_EQ_EQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_int32
 GrB_Info GB_Adot2B__eq_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_int32
 GrB_Info GB_Adot3B__eq_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_int64.c b/Source/Generated/GB_AxB__eq_eq_int64.c
index 376fa72684..d41b4620e2 100644
--- a/Source/Generated/GB_AxB__eq_eq_int64.c
+++ b/Source/Generated/GB_AxB__eq_eq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_int64
 // A'*B function (dot2):     GB_Adot2B__eq_eq_int64
 // A'*B function (dot3):     GB_Adot3B__eq_eq_int64
-// A*B function (heap):      GB_AheapB__eq_eq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_EQ_INT64 || GxB_NO_EQ_EQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_int64
 GrB_Info GB_Adot2B__eq_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_int64
 GrB_Info GB_Adot3B__eq_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_int8.c b/Source/Generated/GB_AxB__eq_eq_int8.c
index 5602dd1533..cd31bddd9c 100644
--- a/Source/Generated/GB_AxB__eq_eq_int8.c
+++ b/Source/Generated/GB_AxB__eq_eq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_int8
 // A'*B function (dot2):     GB_Adot2B__eq_eq_int8
 // A'*B function (dot3):     GB_Adot3B__eq_eq_int8
-// A*B function (heap):      GB_AheapB__eq_eq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_EQ_INT8 || GxB_NO_EQ_EQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_int8
 GrB_Info GB_Adot2B__eq_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_int8
 GrB_Info GB_Adot3B__eq_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_uint16.c b/Source/Generated/GB_AxB__eq_eq_uint16.c
index e78ae93cce..4fd0ffb373 100644
--- a/Source/Generated/GB_AxB__eq_eq_uint16.c
+++ b/Source/Generated/GB_AxB__eq_eq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_eq_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_eq_uint16
-// A*B function (heap):      GB_AheapB__eq_eq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_EQ_UINT16 || GxB_NO_EQ_EQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_uint16
 GrB_Info GB_Adot2B__eq_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_uint16
 GrB_Info GB_Adot3B__eq_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_uint32.c b/Source/Generated/GB_AxB__eq_eq_uint32.c
index a8b233c454..806c951fc4 100644
--- a/Source/Generated/GB_AxB__eq_eq_uint32.c
+++ b/Source/Generated/GB_AxB__eq_eq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_eq_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_eq_uint32
-// A*B function (heap):      GB_AheapB__eq_eq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_EQ_UINT32 || GxB_NO_EQ_EQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_uint32
 GrB_Info GB_Adot2B__eq_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_uint32
 GrB_Info GB_Adot3B__eq_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_uint64.c b/Source/Generated/GB_AxB__eq_eq_uint64.c
index 985ff673bc..d880810282 100644
--- a/Source/Generated/GB_AxB__eq_eq_uint64.c
+++ b/Source/Generated/GB_AxB__eq_eq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_eq_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_eq_uint64
-// A*B function (heap):      GB_AheapB__eq_eq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_EQ_UINT64 || GxB_NO_EQ_EQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_uint64
 GrB_Info GB_Adot2B__eq_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_uint64
 GrB_Info GB_Adot3B__eq_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_eq_uint8.c b/Source/Generated/GB_AxB__eq_eq_uint8.c
index 178ad9f03b..c9ade5286b 100644
--- a/Source/Generated/GB_AxB__eq_eq_uint8.c
+++ b/Source/Generated/GB_AxB__eq_eq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_eq_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_eq_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_eq_uint8
-// A*B function (heap):      GB_AheapB__eq_eq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_eq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_eq_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik == bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_EQ_UINT8 || GxB_NO_EQ_EQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_eq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_eq_uint8
 GrB_Info GB_Adot2B__eq_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_eq_uint8
 GrB_Info GB_Adot3B__eq_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_eq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_eq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_first_bool.c b/Source/Generated/GB_AxB__eq_first_bool.c
index bffb8d9470..ccfed5b994 100644
--- a/Source/Generated/GB_AxB__eq_first_bool.c
+++ b/Source/Generated/GB_AxB__eq_first_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_first_bool
 // A'*B function (dot2):     GB_Adot2B__eq_first_bool
 // A'*B function (dot3):     GB_Adot3B__eq_first_bool
-// A*B function (heap):      GB_AheapB__eq_first_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_first_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_first_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == aik)
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_FIRST_BOOL || GxB_NO_EQ_FIRST_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_first_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_first_bool
 GrB_Info GB_Adot2B__eq_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_first_bool
 GrB_Info GB_Adot3B__eq_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_first_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_first_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_bool.c b/Source/Generated/GB_AxB__eq_ge_bool.c
index 3bf00db361..d576d5b8f3 100644
--- a/Source/Generated/GB_AxB__eq_ge_bool.c
+++ b/Source/Generated/GB_AxB__eq_ge_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_bool
 // A'*B function (dot2):     GB_Adot2B__eq_ge_bool
 // A'*B function (dot3):     GB_Adot3B__eq_ge_bool
-// A*B function (heap):      GB_AheapB__eq_ge_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_GE_BOOL || GxB_NO_EQ_GE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_bool
 GrB_Info GB_Adot2B__eq_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_bool
 GrB_Info GB_Adot3B__eq_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_fp32.c b/Source/Generated/GB_AxB__eq_ge_fp32.c
index 02f168b8db..3343b96351 100644
--- a/Source/Generated/GB_AxB__eq_ge_fp32.c
+++ b/Source/Generated/GB_AxB__eq_ge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_ge_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_ge_fp32
-// A*B function (heap):      GB_AheapB__eq_ge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_GE_FP32 || GxB_NO_EQ_GE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_fp32
 GrB_Info GB_Adot2B__eq_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_fp32
 GrB_Info GB_Adot3B__eq_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_fp64.c b/Source/Generated/GB_AxB__eq_ge_fp64.c
index d0f2e145b8..907c760efd 100644
--- a/Source/Generated/GB_AxB__eq_ge_fp64.c
+++ b/Source/Generated/GB_AxB__eq_ge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_ge_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_ge_fp64
-// A*B function (heap):      GB_AheapB__eq_ge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_GE_FP64 || GxB_NO_EQ_GE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_fp64
 GrB_Info GB_Adot2B__eq_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_fp64
 GrB_Info GB_Adot3B__eq_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_int16.c b/Source/Generated/GB_AxB__eq_ge_int16.c
index f5ce269fad..6002474325 100644
--- a/Source/Generated/GB_AxB__eq_ge_int16.c
+++ b/Source/Generated/GB_AxB__eq_ge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_int16
 // A'*B function (dot2):     GB_Adot2B__eq_ge_int16
 // A'*B function (dot3):     GB_Adot3B__eq_ge_int16
-// A*B function (heap):      GB_AheapB__eq_ge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_GE_INT16 || GxB_NO_EQ_GE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_int16
 GrB_Info GB_Adot2B__eq_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_int16
 GrB_Info GB_Adot3B__eq_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_int32.c b/Source/Generated/GB_AxB__eq_ge_int32.c
index 70d87cc54d..1af38fdf59 100644
--- a/Source/Generated/GB_AxB__eq_ge_int32.c
+++ b/Source/Generated/GB_AxB__eq_ge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_int32
 // A'*B function (dot2):     GB_Adot2B__eq_ge_int32
 // A'*B function (dot3):     GB_Adot3B__eq_ge_int32
-// A*B function (heap):      GB_AheapB__eq_ge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_GE_INT32 || GxB_NO_EQ_GE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_int32
 GrB_Info GB_Adot2B__eq_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_int32
 GrB_Info GB_Adot3B__eq_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_int64.c b/Source/Generated/GB_AxB__eq_ge_int64.c
index a9b1e9f1fd..cecbee1059 100644
--- a/Source/Generated/GB_AxB__eq_ge_int64.c
+++ b/Source/Generated/GB_AxB__eq_ge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_int64
 // A'*B function (dot2):     GB_Adot2B__eq_ge_int64
 // A'*B function (dot3):     GB_Adot3B__eq_ge_int64
-// A*B function (heap):      GB_AheapB__eq_ge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_GE_INT64 || GxB_NO_EQ_GE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_int64
 GrB_Info GB_Adot2B__eq_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_int64
 GrB_Info GB_Adot3B__eq_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_int8.c b/Source/Generated/GB_AxB__eq_ge_int8.c
index 72b17e438f..901b62c9f0 100644
--- a/Source/Generated/GB_AxB__eq_ge_int8.c
+++ b/Source/Generated/GB_AxB__eq_ge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_int8
 // A'*B function (dot2):     GB_Adot2B__eq_ge_int8
 // A'*B function (dot3):     GB_Adot3B__eq_ge_int8
-// A*B function (heap):      GB_AheapB__eq_ge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_GE_INT8 || GxB_NO_EQ_GE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_int8
 GrB_Info GB_Adot2B__eq_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_int8
 GrB_Info GB_Adot3B__eq_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_uint16.c b/Source/Generated/GB_AxB__eq_ge_uint16.c
index 30dcd9446a..85bc4037bb 100644
--- a/Source/Generated/GB_AxB__eq_ge_uint16.c
+++ b/Source/Generated/GB_AxB__eq_ge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_ge_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_ge_uint16
-// A*B function (heap):      GB_AheapB__eq_ge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_GE_UINT16 || GxB_NO_EQ_GE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_uint16
 GrB_Info GB_Adot2B__eq_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_uint16
 GrB_Info GB_Adot3B__eq_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_uint32.c b/Source/Generated/GB_AxB__eq_ge_uint32.c
index cae4984712..0149249dae 100644
--- a/Source/Generated/GB_AxB__eq_ge_uint32.c
+++ b/Source/Generated/GB_AxB__eq_ge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_ge_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_ge_uint32
-// A*B function (heap):      GB_AheapB__eq_ge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_GE_UINT32 || GxB_NO_EQ_GE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_uint32
 GrB_Info GB_Adot2B__eq_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_uint32
 GrB_Info GB_Adot3B__eq_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_uint64.c b/Source/Generated/GB_AxB__eq_ge_uint64.c
index 557a377e37..ee31e19204 100644
--- a/Source/Generated/GB_AxB__eq_ge_uint64.c
+++ b/Source/Generated/GB_AxB__eq_ge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_ge_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_ge_uint64
-// A*B function (heap):      GB_AheapB__eq_ge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_GE_UINT64 || GxB_NO_EQ_GE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_uint64
 GrB_Info GB_Adot2B__eq_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_uint64
 GrB_Info GB_Adot3B__eq_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ge_uint8.c b/Source/Generated/GB_AxB__eq_ge_uint8.c
index 18bc37574c..3cbd285af2 100644
--- a/Source/Generated/GB_AxB__eq_ge_uint8.c
+++ b/Source/Generated/GB_AxB__eq_ge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ge_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_ge_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_ge_uint8
-// A*B function (heap):      GB_AheapB__eq_ge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ge_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik >= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_GE_UINT8 || GxB_NO_EQ_GE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ge_uint8
 GrB_Info GB_Adot2B__eq_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ge_uint8
 GrB_Info GB_Adot3B__eq_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_bool.c b/Source/Generated/GB_AxB__eq_gt_bool.c
index 263e8236c8..7e79e690c8 100644
--- a/Source/Generated/GB_AxB__eq_gt_bool.c
+++ b/Source/Generated/GB_AxB__eq_gt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_bool
 // A'*B function (dot2):     GB_Adot2B__eq_gt_bool
 // A'*B function (dot3):     GB_Adot3B__eq_gt_bool
-// A*B function (heap):      GB_AheapB__eq_gt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_GT_BOOL || GxB_NO_EQ_GT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_bool
 GrB_Info GB_Adot2B__eq_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_bool
 GrB_Info GB_Adot3B__eq_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_fp32.c b/Source/Generated/GB_AxB__eq_gt_fp32.c
index 91b8a5f150..eb53fafc88 100644
--- a/Source/Generated/GB_AxB__eq_gt_fp32.c
+++ b/Source/Generated/GB_AxB__eq_gt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_gt_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_gt_fp32
-// A*B function (heap):      GB_AheapB__eq_gt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_GT_FP32 || GxB_NO_EQ_GT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_fp32
 GrB_Info GB_Adot2B__eq_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_fp32
 GrB_Info GB_Adot3B__eq_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_fp64.c b/Source/Generated/GB_AxB__eq_gt_fp64.c
index e1be86f03c..4aeeefbe7a 100644
--- a/Source/Generated/GB_AxB__eq_gt_fp64.c
+++ b/Source/Generated/GB_AxB__eq_gt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_gt_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_gt_fp64
-// A*B function (heap):      GB_AheapB__eq_gt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_GT_FP64 || GxB_NO_EQ_GT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_fp64
 GrB_Info GB_Adot2B__eq_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_fp64
 GrB_Info GB_Adot3B__eq_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_int16.c b/Source/Generated/GB_AxB__eq_gt_int16.c
index d986a4f5bc..4974ada0dc 100644
--- a/Source/Generated/GB_AxB__eq_gt_int16.c
+++ b/Source/Generated/GB_AxB__eq_gt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_int16
 // A'*B function (dot2):     GB_Adot2B__eq_gt_int16
 // A'*B function (dot3):     GB_Adot3B__eq_gt_int16
-// A*B function (heap):      GB_AheapB__eq_gt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_GT_INT16 || GxB_NO_EQ_GT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_int16
 GrB_Info GB_Adot2B__eq_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_int16
 GrB_Info GB_Adot3B__eq_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_int32.c b/Source/Generated/GB_AxB__eq_gt_int32.c
index 460629a280..c02705de23 100644
--- a/Source/Generated/GB_AxB__eq_gt_int32.c
+++ b/Source/Generated/GB_AxB__eq_gt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_int32
 // A'*B function (dot2):     GB_Adot2B__eq_gt_int32
 // A'*B function (dot3):     GB_Adot3B__eq_gt_int32
-// A*B function (heap):      GB_AheapB__eq_gt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_GT_INT32 || GxB_NO_EQ_GT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_int32
 GrB_Info GB_Adot2B__eq_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_int32
 GrB_Info GB_Adot3B__eq_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_int64.c b/Source/Generated/GB_AxB__eq_gt_int64.c
index 713cebac98..04158d6cc9 100644
--- a/Source/Generated/GB_AxB__eq_gt_int64.c
+++ b/Source/Generated/GB_AxB__eq_gt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_int64
 // A'*B function (dot2):     GB_Adot2B__eq_gt_int64
 // A'*B function (dot3):     GB_Adot3B__eq_gt_int64
-// A*B function (heap):      GB_AheapB__eq_gt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_GT_INT64 || GxB_NO_EQ_GT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_int64
 GrB_Info GB_Adot2B__eq_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_int64
 GrB_Info GB_Adot3B__eq_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_int8.c b/Source/Generated/GB_AxB__eq_gt_int8.c
index e8a7bbb69a..79feb69b30 100644
--- a/Source/Generated/GB_AxB__eq_gt_int8.c
+++ b/Source/Generated/GB_AxB__eq_gt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_int8
 // A'*B function (dot2):     GB_Adot2B__eq_gt_int8
 // A'*B function (dot3):     GB_Adot3B__eq_gt_int8
-// A*B function (heap):      GB_AheapB__eq_gt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_GT_INT8 || GxB_NO_EQ_GT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_int8
 GrB_Info GB_Adot2B__eq_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_int8
 GrB_Info GB_Adot3B__eq_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_uint16.c b/Source/Generated/GB_AxB__eq_gt_uint16.c
index 80448f0e0d..974f57f294 100644
--- a/Source/Generated/GB_AxB__eq_gt_uint16.c
+++ b/Source/Generated/GB_AxB__eq_gt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_gt_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_gt_uint16
-// A*B function (heap):      GB_AheapB__eq_gt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_GT_UINT16 || GxB_NO_EQ_GT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_uint16
 GrB_Info GB_Adot2B__eq_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_uint16
 GrB_Info GB_Adot3B__eq_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_uint32.c b/Source/Generated/GB_AxB__eq_gt_uint32.c
index c75a6d65be..2b37cd39e4 100644
--- a/Source/Generated/GB_AxB__eq_gt_uint32.c
+++ b/Source/Generated/GB_AxB__eq_gt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_gt_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_gt_uint32
-// A*B function (heap):      GB_AheapB__eq_gt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_GT_UINT32 || GxB_NO_EQ_GT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_uint32
 GrB_Info GB_Adot2B__eq_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_uint32
 GrB_Info GB_Adot3B__eq_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_uint64.c b/Source/Generated/GB_AxB__eq_gt_uint64.c
index 5a4b771724..5b6810aa04 100644
--- a/Source/Generated/GB_AxB__eq_gt_uint64.c
+++ b/Source/Generated/GB_AxB__eq_gt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_gt_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_gt_uint64
-// A*B function (heap):      GB_AheapB__eq_gt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_GT_UINT64 || GxB_NO_EQ_GT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_uint64
 GrB_Info GB_Adot2B__eq_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_uint64
 GrB_Info GB_Adot3B__eq_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_gt_uint8.c b/Source/Generated/GB_AxB__eq_gt_uint8.c
index ed07fb3452..87be18b968 100644
--- a/Source/Generated/GB_AxB__eq_gt_uint8.c
+++ b/Source/Generated/GB_AxB__eq_gt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_gt_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_gt_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_gt_uint8
-// A*B function (heap):      GB_AheapB__eq_gt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_gt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_gt_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik > bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_GT_UINT8 || GxB_NO_EQ_GT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_gt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_gt_uint8
 GrB_Info GB_Adot2B__eq_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_gt_uint8
 GrB_Info GB_Adot3B__eq_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_gt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_gt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_land_bool.c b/Source/Generated/GB_AxB__eq_land_bool.c
index 82c201f350..f23b49a046 100644
--- a/Source/Generated/GB_AxB__eq_land_bool.c
+++ b/Source/Generated/GB_AxB__eq_land_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_land_bool
 // A'*B function (dot2):     GB_Adot2B__eq_land_bool
 // A'*B function (dot3):     GB_Adot3B__eq_land_bool
-// A*B function (heap):      GB_AheapB__eq_land_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_land_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_land_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik && bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik && bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x && y) ;
+#define GB_MULT(z, x, y) \
+    z = (x && y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x && y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x && y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LAND_BOOL || GxB_NO_EQ_LAND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_land_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_land_bool
 GrB_Info GB_Adot2B__eq_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_land_bool
 GrB_Info GB_Adot3B__eq_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_land_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_land_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_bool.c b/Source/Generated/GB_AxB__eq_le_bool.c
index 79947960be..43759d3adc 100644
--- a/Source/Generated/GB_AxB__eq_le_bool.c
+++ b/Source/Generated/GB_AxB__eq_le_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_bool
 // A'*B function (dot2):     GB_Adot2B__eq_le_bool
 // A'*B function (dot3):     GB_Adot3B__eq_le_bool
-// A*B function (heap):      GB_AheapB__eq_le_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LE_BOOL || GxB_NO_EQ_LE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_bool
 GrB_Info GB_Adot2B__eq_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_bool
 GrB_Info GB_Adot3B__eq_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_fp32.c b/Source/Generated/GB_AxB__eq_le_fp32.c
index 5008d1f439..ca4121c576 100644
--- a/Source/Generated/GB_AxB__eq_le_fp32.c
+++ b/Source/Generated/GB_AxB__eq_le_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_le_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_le_fp32
-// A*B function (heap):      GB_AheapB__eq_le_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_LE_FP32 || GxB_NO_EQ_LE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_fp32
 GrB_Info GB_Adot2B__eq_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_fp32
 GrB_Info GB_Adot3B__eq_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_fp64.c b/Source/Generated/GB_AxB__eq_le_fp64.c
index 0a18f42010..4eb0290f35 100644
--- a/Source/Generated/GB_AxB__eq_le_fp64.c
+++ b/Source/Generated/GB_AxB__eq_le_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_le_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_le_fp64
-// A*B function (heap):      GB_AheapB__eq_le_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_LE_FP64 || GxB_NO_EQ_LE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_fp64
 GrB_Info GB_Adot2B__eq_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_fp64
 GrB_Info GB_Adot3B__eq_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_int16.c b/Source/Generated/GB_AxB__eq_le_int16.c
index 34e2959054..74abfb8a80 100644
--- a/Source/Generated/GB_AxB__eq_le_int16.c
+++ b/Source/Generated/GB_AxB__eq_le_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_int16
 // A'*B function (dot2):     GB_Adot2B__eq_le_int16
 // A'*B function (dot3):     GB_Adot3B__eq_le_int16
-// A*B function (heap):      GB_AheapB__eq_le_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_LE_INT16 || GxB_NO_EQ_LE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_int16
 GrB_Info GB_Adot2B__eq_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_int16
 GrB_Info GB_Adot3B__eq_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_int32.c b/Source/Generated/GB_AxB__eq_le_int32.c
index cc5c312fbe..69b2473425 100644
--- a/Source/Generated/GB_AxB__eq_le_int32.c
+++ b/Source/Generated/GB_AxB__eq_le_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_int32
 // A'*B function (dot2):     GB_Adot2B__eq_le_int32
 // A'*B function (dot3):     GB_Adot3B__eq_le_int32
-// A*B function (heap):      GB_AheapB__eq_le_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_LE_INT32 || GxB_NO_EQ_LE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_int32
 GrB_Info GB_Adot2B__eq_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_int32
 GrB_Info GB_Adot3B__eq_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_int64.c b/Source/Generated/GB_AxB__eq_le_int64.c
index 8bc41db14f..9a04f0d9c1 100644
--- a/Source/Generated/GB_AxB__eq_le_int64.c
+++ b/Source/Generated/GB_AxB__eq_le_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_int64
 // A'*B function (dot2):     GB_Adot2B__eq_le_int64
 // A'*B function (dot3):     GB_Adot3B__eq_le_int64
-// A*B function (heap):      GB_AheapB__eq_le_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_LE_INT64 || GxB_NO_EQ_LE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_int64
 GrB_Info GB_Adot2B__eq_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_int64
 GrB_Info GB_Adot3B__eq_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_int8.c b/Source/Generated/GB_AxB__eq_le_int8.c
index 7143022c27..8df99bb715 100644
--- a/Source/Generated/GB_AxB__eq_le_int8.c
+++ b/Source/Generated/GB_AxB__eq_le_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_int8
 // A'*B function (dot2):     GB_Adot2B__eq_le_int8
 // A'*B function (dot3):     GB_Adot3B__eq_le_int8
-// A*B function (heap):      GB_AheapB__eq_le_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_LE_INT8 || GxB_NO_EQ_LE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_int8
 GrB_Info GB_Adot2B__eq_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_int8
 GrB_Info GB_Adot3B__eq_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_uint16.c b/Source/Generated/GB_AxB__eq_le_uint16.c
index 199370d941..f4b7d89a92 100644
--- a/Source/Generated/GB_AxB__eq_le_uint16.c
+++ b/Source/Generated/GB_AxB__eq_le_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_le_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_le_uint16
-// A*B function (heap):      GB_AheapB__eq_le_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_LE_UINT16 || GxB_NO_EQ_LE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_uint16
 GrB_Info GB_Adot2B__eq_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_uint16
 GrB_Info GB_Adot3B__eq_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_uint32.c b/Source/Generated/GB_AxB__eq_le_uint32.c
index 1d76a1340a..d95b2fd1cc 100644
--- a/Source/Generated/GB_AxB__eq_le_uint32.c
+++ b/Source/Generated/GB_AxB__eq_le_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_le_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_le_uint32
-// A*B function (heap):      GB_AheapB__eq_le_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_LE_UINT32 || GxB_NO_EQ_LE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_uint32
 GrB_Info GB_Adot2B__eq_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_uint32
 GrB_Info GB_Adot3B__eq_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_uint64.c b/Source/Generated/GB_AxB__eq_le_uint64.c
index 0b61e1ccd0..6ce9fa161a 100644
--- a/Source/Generated/GB_AxB__eq_le_uint64.c
+++ b/Source/Generated/GB_AxB__eq_le_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_le_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_le_uint64
-// A*B function (heap):      GB_AheapB__eq_le_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_LE_UINT64 || GxB_NO_EQ_LE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_uint64
 GrB_Info GB_Adot2B__eq_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_uint64
 GrB_Info GB_Adot3B__eq_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_le_uint8.c b/Source/Generated/GB_AxB__eq_le_uint8.c
index 01f97ddbd3..99425eff87 100644
--- a/Source/Generated/GB_AxB__eq_le_uint8.c
+++ b/Source/Generated/GB_AxB__eq_le_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_le_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_le_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_le_uint8
-// A*B function (heap):      GB_AheapB__eq_le_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_le_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_le_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik <= bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_LE_UINT8 || GxB_NO_EQ_LE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_le_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_le_uint8
 GrB_Info GB_Adot2B__eq_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_le_uint8
 GrB_Info GB_Adot3B__eq_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_le_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_le_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lor_bool.c b/Source/Generated/GB_AxB__eq_lor_bool.c
index 4976358f6e..c22810551d 100644
--- a/Source/Generated/GB_AxB__eq_lor_bool.c
+++ b/Source/Generated/GB_AxB__eq_lor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lor_bool
 // A'*B function (dot2):     GB_Adot2B__eq_lor_bool
 // A'*B function (dot3):     GB_Adot3B__eq_lor_bool
-// A*B function (heap):      GB_AheapB__eq_lor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lor_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik || bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik || bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x || y) ;
+#define GB_MULT(z, x, y) \
+    z = (x || y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x || y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x || y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LOR_BOOL || GxB_NO_EQ_LOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lor_bool
 GrB_Info GB_Adot2B__eq_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lor_bool
 GrB_Info GB_Adot3B__eq_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_bool.c b/Source/Generated/GB_AxB__eq_lt_bool.c
index 5bd7314401..20749a2092 100644
--- a/Source/Generated/GB_AxB__eq_lt_bool.c
+++ b/Source/Generated/GB_AxB__eq_lt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_bool
 // A'*B function (dot2):     GB_Adot2B__eq_lt_bool
 // A'*B function (dot3):     GB_Adot3B__eq_lt_bool
-// A*B function (heap):      GB_AheapB__eq_lt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LT_BOOL || GxB_NO_EQ_LT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_bool
 GrB_Info GB_Adot2B__eq_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_bool
 GrB_Info GB_Adot3B__eq_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_fp32.c b/Source/Generated/GB_AxB__eq_lt_fp32.c
index 0039d7b9fa..042dcda18b 100644
--- a/Source/Generated/GB_AxB__eq_lt_fp32.c
+++ b/Source/Generated/GB_AxB__eq_lt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_lt_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_lt_fp32
-// A*B function (heap):      GB_AheapB__eq_lt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_LT_FP32 || GxB_NO_EQ_LT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_fp32
 GrB_Info GB_Adot2B__eq_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_fp32
 GrB_Info GB_Adot3B__eq_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_fp64.c b/Source/Generated/GB_AxB__eq_lt_fp64.c
index b80edc79ea..97ddd7a8cf 100644
--- a/Source/Generated/GB_AxB__eq_lt_fp64.c
+++ b/Source/Generated/GB_AxB__eq_lt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_lt_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_lt_fp64
-// A*B function (heap):      GB_AheapB__eq_lt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_LT_FP64 || GxB_NO_EQ_LT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_fp64
 GrB_Info GB_Adot2B__eq_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_fp64
 GrB_Info GB_Adot3B__eq_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_int16.c b/Source/Generated/GB_AxB__eq_lt_int16.c
index 95abdd44f6..5b9b2a3ec4 100644
--- a/Source/Generated/GB_AxB__eq_lt_int16.c
+++ b/Source/Generated/GB_AxB__eq_lt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_int16
 // A'*B function (dot2):     GB_Adot2B__eq_lt_int16
 // A'*B function (dot3):     GB_Adot3B__eq_lt_int16
-// A*B function (heap):      GB_AheapB__eq_lt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_LT_INT16 || GxB_NO_EQ_LT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_int16
 GrB_Info GB_Adot2B__eq_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_int16
 GrB_Info GB_Adot3B__eq_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_int32.c b/Source/Generated/GB_AxB__eq_lt_int32.c
index 7bfe2bcd00..fe7d759aae 100644
--- a/Source/Generated/GB_AxB__eq_lt_int32.c
+++ b/Source/Generated/GB_AxB__eq_lt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_int32
 // A'*B function (dot2):     GB_Adot2B__eq_lt_int32
 // A'*B function (dot3):     GB_Adot3B__eq_lt_int32
-// A*B function (heap):      GB_AheapB__eq_lt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_LT_INT32 || GxB_NO_EQ_LT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_int32
 GrB_Info GB_Adot2B__eq_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_int32
 GrB_Info GB_Adot3B__eq_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_int64.c b/Source/Generated/GB_AxB__eq_lt_int64.c
index b758a5a13a..bd0e388934 100644
--- a/Source/Generated/GB_AxB__eq_lt_int64.c
+++ b/Source/Generated/GB_AxB__eq_lt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_int64
 // A'*B function (dot2):     GB_Adot2B__eq_lt_int64
 // A'*B function (dot3):     GB_Adot3B__eq_lt_int64
-// A*B function (heap):      GB_AheapB__eq_lt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_LT_INT64 || GxB_NO_EQ_LT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_int64
 GrB_Info GB_Adot2B__eq_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_int64
 GrB_Info GB_Adot3B__eq_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_int8.c b/Source/Generated/GB_AxB__eq_lt_int8.c
index 70f5076c08..beb9dff154 100644
--- a/Source/Generated/GB_AxB__eq_lt_int8.c
+++ b/Source/Generated/GB_AxB__eq_lt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_int8
 // A'*B function (dot2):     GB_Adot2B__eq_lt_int8
 // A'*B function (dot3):     GB_Adot3B__eq_lt_int8
-// A*B function (heap):      GB_AheapB__eq_lt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_LT_INT8 || GxB_NO_EQ_LT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_int8
 GrB_Info GB_Adot2B__eq_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_int8
 GrB_Info GB_Adot3B__eq_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_uint16.c b/Source/Generated/GB_AxB__eq_lt_uint16.c
index 553ba1e1fa..a4d9995a72 100644
--- a/Source/Generated/GB_AxB__eq_lt_uint16.c
+++ b/Source/Generated/GB_AxB__eq_lt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_lt_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_lt_uint16
-// A*B function (heap):      GB_AheapB__eq_lt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_LT_UINT16 || GxB_NO_EQ_LT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_uint16
 GrB_Info GB_Adot2B__eq_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_uint16
 GrB_Info GB_Adot3B__eq_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_uint32.c b/Source/Generated/GB_AxB__eq_lt_uint32.c
index 9843a6b2be..788ddfe0f4 100644
--- a/Source/Generated/GB_AxB__eq_lt_uint32.c
+++ b/Source/Generated/GB_AxB__eq_lt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_lt_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_lt_uint32
-// A*B function (heap):      GB_AheapB__eq_lt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_LT_UINT32 || GxB_NO_EQ_LT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_uint32
 GrB_Info GB_Adot2B__eq_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_uint32
 GrB_Info GB_Adot3B__eq_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_uint64.c b/Source/Generated/GB_AxB__eq_lt_uint64.c
index b8149d4ce2..453db82020 100644
--- a/Source/Generated/GB_AxB__eq_lt_uint64.c
+++ b/Source/Generated/GB_AxB__eq_lt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_lt_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_lt_uint64
-// A*B function (heap):      GB_AheapB__eq_lt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_LT_UINT64 || GxB_NO_EQ_LT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_uint64
 GrB_Info GB_Adot2B__eq_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_uint64
 GrB_Info GB_Adot3B__eq_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lt_uint8.c b/Source/Generated/GB_AxB__eq_lt_uint8.c
index fdfdf757de..24d8f28b67 100644
--- a/Source/Generated/GB_AxB__eq_lt_uint8.c
+++ b/Source/Generated/GB_AxB__eq_lt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lt_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_lt_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_lt_uint8
-// A*B function (heap):      GB_AheapB__eq_lt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lt_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik < bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_LT_UINT8 || GxB_NO_EQ_LT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lt_uint8
 GrB_Info GB_Adot2B__eq_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lt_uint8
 GrB_Info GB_Adot3B__eq_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_lxor_bool.c b/Source/Generated/GB_AxB__eq_lxor_bool.c
index 55c613fb40..458cd93333 100644
--- a/Source/Generated/GB_AxB__eq_lxor_bool.c
+++ b/Source/Generated/GB_AxB__eq_lxor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_lxor_bool
 // A'*B function (dot2):     GB_Adot2B__eq_lxor_bool
 // A'*B function (dot3):     GB_Adot3B__eq_lxor_bool
-// A*B function (heap):      GB_AheapB__eq_lxor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_lxor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_lxor_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_EQ_LXOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_lxor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_lxor_bool
 GrB_Info GB_Adot2B__eq_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_lxor_bool
 GrB_Info GB_Adot3B__eq_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_lxor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_lxor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_fp32.c b/Source/Generated/GB_AxB__eq_ne_fp32.c
index 5ba479a53d..ab3adbaa74 100644
--- a/Source/Generated/GB_AxB__eq_ne_fp32.c
+++ b/Source/Generated/GB_AxB__eq_ne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_fp32
 // A'*B function (dot2):     GB_Adot2B__eq_ne_fp32
 // A'*B function (dot3):     GB_Adot3B__eq_ne_fp32
-// A*B function (heap):      GB_AheapB__eq_ne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_fp32
 
 // C type:   bool
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_FP32 || GxB_NO_EQ_BOOL || GxB_NO_NE_FP32 || GxB_NO_EQ_NE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_fp32
 GrB_Info GB_Adot2B__eq_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_fp32
 GrB_Info GB_Adot3B__eq_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_fp64.c b/Source/Generated/GB_AxB__eq_ne_fp64.c
index 898a70ac28..a56797059e 100644
--- a/Source/Generated/GB_AxB__eq_ne_fp64.c
+++ b/Source/Generated/GB_AxB__eq_ne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_fp64
 // A'*B function (dot2):     GB_Adot2B__eq_ne_fp64
 // A'*B function (dot3):     GB_Adot3B__eq_ne_fp64
-// A*B function (heap):      GB_AheapB__eq_ne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_fp64
 
 // C type:   bool
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_FP64 || GxB_NO_EQ_BOOL || GxB_NO_NE_FP64 || GxB_NO_EQ_NE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_fp64
 GrB_Info GB_Adot2B__eq_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_fp64
 GrB_Info GB_Adot3B__eq_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_int16.c b/Source/Generated/GB_AxB__eq_ne_int16.c
index 250fc0fba2..cef0d47418 100644
--- a/Source/Generated/GB_AxB__eq_ne_int16.c
+++ b/Source/Generated/GB_AxB__eq_ne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_int16
 // A'*B function (dot2):     GB_Adot2B__eq_ne_int16
 // A'*B function (dot3):     GB_Adot3B__eq_ne_int16
-// A*B function (heap):      GB_AheapB__eq_ne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_INT16 || GxB_NO_EQ_BOOL || GxB_NO_NE_INT16 || GxB_NO_EQ_NE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_int16
 GrB_Info GB_Adot2B__eq_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_int16
 GrB_Info GB_Adot3B__eq_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_int32.c b/Source/Generated/GB_AxB__eq_ne_int32.c
index ccd033c899..e57af96a75 100644
--- a/Source/Generated/GB_AxB__eq_ne_int32.c
+++ b/Source/Generated/GB_AxB__eq_ne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_int32
 // A'*B function (dot2):     GB_Adot2B__eq_ne_int32
 // A'*B function (dot3):     GB_Adot3B__eq_ne_int32
-// A*B function (heap):      GB_AheapB__eq_ne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_INT32 || GxB_NO_EQ_BOOL || GxB_NO_NE_INT32 || GxB_NO_EQ_NE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_int32
 GrB_Info GB_Adot2B__eq_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_int32
 GrB_Info GB_Adot3B__eq_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_int64.c b/Source/Generated/GB_AxB__eq_ne_int64.c
index ae41234623..5cda6c379b 100644
--- a/Source/Generated/GB_AxB__eq_ne_int64.c
+++ b/Source/Generated/GB_AxB__eq_ne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_int64
 // A'*B function (dot2):     GB_Adot2B__eq_ne_int64
 // A'*B function (dot3):     GB_Adot3B__eq_ne_int64
-// A*B function (heap):      GB_AheapB__eq_ne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_INT64 || GxB_NO_EQ_BOOL || GxB_NO_NE_INT64 || GxB_NO_EQ_NE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_int64
 GrB_Info GB_Adot2B__eq_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_int64
 GrB_Info GB_Adot3B__eq_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_int8.c b/Source/Generated/GB_AxB__eq_ne_int8.c
index 0f81ea42f7..12e9c4bf69 100644
--- a/Source/Generated/GB_AxB__eq_ne_int8.c
+++ b/Source/Generated/GB_AxB__eq_ne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_int8
 // A'*B function (dot2):     GB_Adot2B__eq_ne_int8
 // A'*B function (dot3):     GB_Adot3B__eq_ne_int8
-// A*B function (heap):      GB_AheapB__eq_ne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_INT8 || GxB_NO_EQ_BOOL || GxB_NO_NE_INT8 || GxB_NO_EQ_NE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_int8
 GrB_Info GB_Adot2B__eq_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_int8
 GrB_Info GB_Adot3B__eq_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_uint16.c b/Source/Generated/GB_AxB__eq_ne_uint16.c
index 2bb1aaf052..03b1808acf 100644
--- a/Source/Generated/GB_AxB__eq_ne_uint16.c
+++ b/Source/Generated/GB_AxB__eq_ne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_uint16
 // A'*B function (dot2):     GB_Adot2B__eq_ne_uint16
 // A'*B function (dot3):     GB_Adot3B__eq_ne_uint16
-// A*B function (heap):      GB_AheapB__eq_ne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_EQ_BOOL || GxB_NO_NE_UINT16 || GxB_NO_EQ_NE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_uint16
 GrB_Info GB_Adot2B__eq_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_uint16
 GrB_Info GB_Adot3B__eq_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_uint32.c b/Source/Generated/GB_AxB__eq_ne_uint32.c
index 37b8c086a2..1b2060879d 100644
--- a/Source/Generated/GB_AxB__eq_ne_uint32.c
+++ b/Source/Generated/GB_AxB__eq_ne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_uint32
 // A'*B function (dot2):     GB_Adot2B__eq_ne_uint32
 // A'*B function (dot3):     GB_Adot3B__eq_ne_uint32
-// A*B function (heap):      GB_AheapB__eq_ne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_EQ_BOOL || GxB_NO_NE_UINT32 || GxB_NO_EQ_NE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_uint32
 GrB_Info GB_Adot2B__eq_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_uint32
 GrB_Info GB_Adot3B__eq_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_uint64.c b/Source/Generated/GB_AxB__eq_ne_uint64.c
index 10cb6d3406..4f2439a054 100644
--- a/Source/Generated/GB_AxB__eq_ne_uint64.c
+++ b/Source/Generated/GB_AxB__eq_ne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_uint64
 // A'*B function (dot2):     GB_Adot2B__eq_ne_uint64
 // A'*B function (dot3):     GB_Adot3B__eq_ne_uint64
-// A*B function (heap):      GB_AheapB__eq_ne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_EQ_BOOL || GxB_NO_NE_UINT64 || GxB_NO_EQ_NE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_uint64
 GrB_Info GB_Adot2B__eq_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_uint64
 GrB_Info GB_Adot3B__eq_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_ne_uint8.c b/Source/Generated/GB_AxB__eq_ne_uint8.c
index f10955f657..ad8e907928 100644
--- a/Source/Generated/GB_AxB__eq_ne_uint8.c
+++ b/Source/Generated/GB_AxB__eq_ne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_ne_uint8
 // A'*B function (dot2):     GB_Adot2B__eq_ne_uint8
 // A'*B function (dot3):     GB_Adot3B__eq_ne_uint8
-// A*B function (heap):      GB_AheapB__eq_ne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__eq_ne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_ne_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == (aik != bkj))
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_EQ_BOOL || GxB_NO_NE_UINT8 || GxB_NO_EQ_NE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_ne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_ne_uint8
 GrB_Info GB_Adot2B__eq_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_ne_uint8
 GrB_Info GB_Adot3B__eq_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_ne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_ne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__eq_second_bool.c b/Source/Generated/GB_AxB__eq_second_bool.c
index 9038da97c2..c3190db57b 100644
--- a/Source/Generated/GB_AxB__eq_second_bool.c
+++ b/Source/Generated/GB_AxB__eq_second_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__eq_second_bool
 // A'*B function (dot2):     GB_Adot2B__eq_second_bool
 // A'*B function (dot3):     GB_Adot3B__eq_second_bool
-// A*B function (heap):      GB_AheapB__eq_second_bool
+// C+=A'*B function (dot4):  GB_Adot4B__eq_second_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__eq_second_bool
 
 // C type:   bool
 // A type:   bool
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = (cij == z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = (cij == bkj)
 // Identity: true
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = (z == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = (Cx [p] == t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x == y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    1
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = (Hx [i] == t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_EQ_BOOL || GxB_NO_SECOND_BOOL || GxB_NO_EQ_SECOND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__eq_second_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__eq_second_bool
 GrB_Info GB_Adot2B__eq_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__eq_second_bool
 GrB_Info GB_Adot3B__eq_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__eq_second_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__eq_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__eq_second_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__eq_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__include.h b/Source/Generated/GB_AxB__include.h
index 8241470bf5..72e34a9452 100644
--- a/Source/Generated/GB_AxB__include.h
+++ b/Source/Generated/GB_AxB__include.h
@@ -2,25 +2,15 @@
 // GB_AxB__include.h: definitions for GB_AxB__*.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // This file has been automatically generated from Generator/GB_AxB.h
 
-
-GrB_Info GB_AgusB__min_first_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-) ;
-
 GrB_Info GB_Adot2B__min_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31,7 +21,7 @@ GrB_Info GB_Adot2B__min_first_int8
 GrB_Info GB_Adot3B__min_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39,32 +29,32 @@ GrB_Info GB_Adot3B__min_first_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_int8
+GrB_Info GB_Asaxpy3B__min_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_int16
+GrB_Info GB_Adot4B__min_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -75,7 +65,7 @@ GrB_Info GB_Adot2B__min_first_int16
 GrB_Info GB_Adot3B__min_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -83,32 +73,32 @@ GrB_Info GB_Adot3B__min_first_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_int16
+GrB_Info GB_Asaxpy3B__min_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_int32
+GrB_Info GB_Adot4B__min_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -119,7 +109,7 @@ GrB_Info GB_Adot2B__min_first_int32
 GrB_Info GB_Adot3B__min_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -127,32 +117,32 @@ GrB_Info GB_Adot3B__min_first_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_int32
+GrB_Info GB_Asaxpy3B__min_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_int64
+GrB_Info GB_Adot4B__min_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -163,7 +153,7 @@ GrB_Info GB_Adot2B__min_first_int64
 GrB_Info GB_Adot3B__min_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -171,32 +161,32 @@ GrB_Info GB_Adot3B__min_first_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_int64
+GrB_Info GB_Asaxpy3B__min_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_uint8
+GrB_Info GB_Adot4B__min_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -207,7 +197,7 @@ GrB_Info GB_Adot2B__min_first_uint8
 GrB_Info GB_Adot3B__min_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -215,32 +205,32 @@ GrB_Info GB_Adot3B__min_first_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_uint8
+GrB_Info GB_Asaxpy3B__min_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_uint16
+GrB_Info GB_Adot4B__min_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -251,7 +241,7 @@ GrB_Info GB_Adot2B__min_first_uint16
 GrB_Info GB_Adot3B__min_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -259,32 +249,32 @@ GrB_Info GB_Adot3B__min_first_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_uint16
+GrB_Info GB_Asaxpy3B__min_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_uint32
+GrB_Info GB_Adot4B__min_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -295,7 +285,7 @@ GrB_Info GB_Adot2B__min_first_uint32
 GrB_Info GB_Adot3B__min_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -303,32 +293,32 @@ GrB_Info GB_Adot3B__min_first_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_uint32
+GrB_Info GB_Asaxpy3B__min_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_uint64
+GrB_Info GB_Adot4B__min_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -339,7 +329,7 @@ GrB_Info GB_Adot2B__min_first_uint64
 GrB_Info GB_Adot3B__min_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -347,32 +337,32 @@ GrB_Info GB_Adot3B__min_first_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_uint64
+GrB_Info GB_Asaxpy3B__min_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_fp32
+GrB_Info GB_Adot4B__min_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -383,7 +373,7 @@ GrB_Info GB_Adot2B__min_first_fp32
 GrB_Info GB_Adot3B__min_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -391,32 +381,32 @@ GrB_Info GB_Adot3B__min_first_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_fp32
+GrB_Info GB_Asaxpy3B__min_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_first_fp64
+GrB_Info GB_Adot4B__min_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -427,7 +417,7 @@ GrB_Info GB_Adot2B__min_first_fp64
 GrB_Info GB_Adot3B__min_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -435,32 +425,32 @@ GrB_Info GB_Adot3B__min_first_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_first_fp64
+GrB_Info GB_Asaxpy3B__min_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_int8
+GrB_Info GB_Adot4B__min_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -471,7 +461,7 @@ GrB_Info GB_Adot2B__max_first_int8
 GrB_Info GB_Adot3B__max_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -479,32 +469,32 @@ GrB_Info GB_Adot3B__max_first_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_int8
+GrB_Info GB_Asaxpy3B__max_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_int16
+GrB_Info GB_Adot4B__max_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -515,7 +505,7 @@ GrB_Info GB_Adot2B__max_first_int16
 GrB_Info GB_Adot3B__max_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -523,32 +513,32 @@ GrB_Info GB_Adot3B__max_first_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_int16
+GrB_Info GB_Asaxpy3B__max_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_int32
+GrB_Info GB_Adot4B__max_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -559,7 +549,7 @@ GrB_Info GB_Adot2B__max_first_int32
 GrB_Info GB_Adot3B__max_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -567,32 +557,32 @@ GrB_Info GB_Adot3B__max_first_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_int32
+GrB_Info GB_Asaxpy3B__max_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_int64
+GrB_Info GB_Adot4B__max_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -603,7 +593,7 @@ GrB_Info GB_Adot2B__max_first_int64
 GrB_Info GB_Adot3B__max_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -611,32 +601,32 @@ GrB_Info GB_Adot3B__max_first_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_int64
+GrB_Info GB_Asaxpy3B__max_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_uint8
+GrB_Info GB_Adot4B__max_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -647,7 +637,7 @@ GrB_Info GB_Adot2B__max_first_uint8
 GrB_Info GB_Adot3B__max_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -655,32 +645,32 @@ GrB_Info GB_Adot3B__max_first_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_uint8
+GrB_Info GB_Asaxpy3B__max_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_uint16
+GrB_Info GB_Adot4B__max_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -691,7 +681,7 @@ GrB_Info GB_Adot2B__max_first_uint16
 GrB_Info GB_Adot3B__max_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -699,32 +689,32 @@ GrB_Info GB_Adot3B__max_first_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_uint16
+GrB_Info GB_Asaxpy3B__max_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_uint32
+GrB_Info GB_Adot4B__max_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -735,7 +725,7 @@ GrB_Info GB_Adot2B__max_first_uint32
 GrB_Info GB_Adot3B__max_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -743,32 +733,32 @@ GrB_Info GB_Adot3B__max_first_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_uint32
+GrB_Info GB_Asaxpy3B__max_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_uint64
+GrB_Info GB_Adot4B__max_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -779,7 +769,7 @@ GrB_Info GB_Adot2B__max_first_uint64
 GrB_Info GB_Adot3B__max_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -787,32 +777,32 @@ GrB_Info GB_Adot3B__max_first_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_uint64
+GrB_Info GB_Asaxpy3B__max_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_fp32
+GrB_Info GB_Adot4B__max_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -823,7 +813,7 @@ GrB_Info GB_Adot2B__max_first_fp32
 GrB_Info GB_Adot3B__max_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -831,32 +821,32 @@ GrB_Info GB_Adot3B__max_first_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_fp32
+GrB_Info GB_Asaxpy3B__max_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_first_fp64
+GrB_Info GB_Adot4B__max_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -867,7 +857,7 @@ GrB_Info GB_Adot2B__max_first_fp64
 GrB_Info GB_Adot3B__max_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -875,32 +865,76 @@ GrB_Info GB_Adot3B__max_first_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_first_fp64
+GrB_Info GB_Asaxpy3B__max_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__max_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_int8
+GrB_Info GB_Adot3B__any_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_int8
+GrB_Info GB_Asaxpy3B__any_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -908,10 +942,10 @@ GrB_Info GB_Adot2B__plus_first_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_int8
+GrB_Info GB_Adot3B__any_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -919,32 +953,76 @@ GrB_Info GB_Adot3B__plus_first_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_int8
+GrB_Info GB_Asaxpy3B__any_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_uint8
+GrB_Info GB_Adot3B__any_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_uint8
+GrB_Info GB_Asaxpy3B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -952,10 +1030,10 @@ GrB_Info GB_Adot2B__plus_first_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_uint8
+GrB_Info GB_Adot3B__any_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -963,32 +1041,76 @@ GrB_Info GB_Adot3B__plus_first_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_uint8
+GrB_Info GB_Asaxpy3B__any_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
 ) ;
 
+GrB_Info GB_Adot3B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_first_int16
+GrB_Info GB_Asaxpy3B__any_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_int16
+GrB_Info GB_Adot4B__any_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -996,10 +1118,10 @@ GrB_Info GB_Adot2B__plus_first_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_int16
+GrB_Info GB_Adot3B__any_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1007,32 +1129,76 @@ GrB_Info GB_Adot3B__plus_first_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_int16
+GrB_Info GB_Asaxpy3B__any_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_uint16
+GrB_Info GB_Adot3B__any_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_uint16
+GrB_Info GB_Asaxpy3B__any_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1040,10 +1206,10 @@ GrB_Info GB_Adot2B__plus_first_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_uint16
+GrB_Info GB_Adot3B__any_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1051,32 +1217,76 @@ GrB_Info GB_Adot3B__plus_first_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_uint16
+GrB_Info GB_Asaxpy3B__any_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_int32
+GrB_Info GB_Adot3B__any_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_int32
+GrB_Info GB_Asaxpy3B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1084,10 +1294,10 @@ GrB_Info GB_Adot2B__plus_first_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_int32
+GrB_Info GB_Adot3B__any_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1095,32 +1305,76 @@ GrB_Info GB_Adot3B__plus_first_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_int32
+GrB_Info GB_Asaxpy3B__any_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_uint32
+GrB_Info GB_Adot3B__plus_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_uint32
+GrB_Info GB_Asaxpy3B__plus_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1128,10 +1382,10 @@ GrB_Info GB_Adot2B__plus_first_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_uint32
+GrB_Info GB_Adot3B__plus_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1139,32 +1393,76 @@ GrB_Info GB_Adot3B__plus_first_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_uint32
+GrB_Info GB_Asaxpy3B__plus_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
 ) ;
 
+GrB_Info GB_Adot3B__plus_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_first_int64
+GrB_Info GB_Asaxpy3B__plus_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_int64
+GrB_Info GB_Adot4B__plus_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1172,10 +1470,10 @@ GrB_Info GB_Adot2B__plus_first_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_int64
+GrB_Info GB_Adot3B__plus_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1183,32 +1481,76 @@ GrB_Info GB_Adot3B__plus_first_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_int64
+GrB_Info GB_Asaxpy3B__plus_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_first_uint64
+GrB_Info GB_Adot3B__plus_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_first_uint64
+GrB_Info GB_Asaxpy3B__plus_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1216,10 +1558,10 @@ GrB_Info GB_Adot2B__plus_first_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_first_uint64
+GrB_Info GB_Adot3B__plus_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1227,32 +1569,120 @@ GrB_Info GB_Adot3B__plus_first_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_uint64
+GrB_Info GB_Asaxpy3B__plus_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_first_fp32
+GrB_Info GB_Asaxpy3B__plus_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
 GrB_Info GB_Adot2B__plus_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1263,7 +1693,7 @@ GrB_Info GB_Adot2B__plus_first_fp32
 GrB_Info GB_Adot3B__plus_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1271,32 +1701,32 @@ GrB_Info GB_Adot3B__plus_first_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_fp32
+GrB_Info GB_Asaxpy3B__plus_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_first_fp64
+GrB_Info GB_Adot4B__plus_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1307,7 +1737,7 @@ GrB_Info GB_Adot2B__plus_first_fp64
 GrB_Info GB_Adot3B__plus_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1315,32 +1745,32 @@ GrB_Info GB_Adot3B__plus_first_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_first_fp64
+GrB_Info GB_Asaxpy3B__plus_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_int8
+GrB_Info GB_Adot4B__plus_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1351,7 +1781,7 @@ GrB_Info GB_Adot2B__times_first_int8
 GrB_Info GB_Adot3B__times_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1359,32 +1789,32 @@ GrB_Info GB_Adot3B__times_first_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_int8
+GrB_Info GB_Asaxpy3B__times_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_uint8
+GrB_Info GB_Adot4B__times_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1395,7 +1825,7 @@ GrB_Info GB_Adot2B__times_first_uint8
 GrB_Info GB_Adot3B__times_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1403,32 +1833,32 @@ GrB_Info GB_Adot3B__times_first_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_uint8
+GrB_Info GB_Asaxpy3B__times_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_int16
+GrB_Info GB_Adot4B__times_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1439,7 +1869,7 @@ GrB_Info GB_Adot2B__times_first_int16
 GrB_Info GB_Adot3B__times_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1447,32 +1877,32 @@ GrB_Info GB_Adot3B__times_first_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_int16
+GrB_Info GB_Asaxpy3B__times_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_uint16
+GrB_Info GB_Adot4B__times_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1483,7 +1913,7 @@ GrB_Info GB_Adot2B__times_first_uint16
 GrB_Info GB_Adot3B__times_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1491,32 +1921,32 @@ GrB_Info GB_Adot3B__times_first_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_uint16
+GrB_Info GB_Asaxpy3B__times_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_int32
+GrB_Info GB_Adot4B__times_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1527,7 +1957,7 @@ GrB_Info GB_Adot2B__times_first_int32
 GrB_Info GB_Adot3B__times_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1535,32 +1965,32 @@ GrB_Info GB_Adot3B__times_first_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_int32
+GrB_Info GB_Asaxpy3B__times_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_uint32
+GrB_Info GB_Adot4B__times_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1571,7 +2001,7 @@ GrB_Info GB_Adot2B__times_first_uint32
 GrB_Info GB_Adot3B__times_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1579,32 +2009,32 @@ GrB_Info GB_Adot3B__times_first_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_uint32
+GrB_Info GB_Asaxpy3B__times_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_int64
+GrB_Info GB_Adot4B__times_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1615,7 +2045,7 @@ GrB_Info GB_Adot2B__times_first_int64
 GrB_Info GB_Adot3B__times_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1623,32 +2053,32 @@ GrB_Info GB_Adot3B__times_first_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_int64
+GrB_Info GB_Asaxpy3B__times_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_uint64
+GrB_Info GB_Adot4B__times_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1659,7 +2089,7 @@ GrB_Info GB_Adot2B__times_first_uint64
 GrB_Info GB_Adot3B__times_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1667,32 +2097,32 @@ GrB_Info GB_Adot3B__times_first_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_uint64
+GrB_Info GB_Asaxpy3B__times_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_fp32
+GrB_Info GB_Adot4B__times_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1703,7 +2133,7 @@ GrB_Info GB_Adot2B__times_first_fp32
 GrB_Info GB_Adot3B__times_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1711,32 +2141,32 @@ GrB_Info GB_Adot3B__times_first_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_fp32
+GrB_Info GB_Asaxpy3B__times_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_first_fp64
+GrB_Info GB_Adot4B__times_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1747,7 +2177,7 @@ GrB_Info GB_Adot2B__times_first_fp64
 GrB_Info GB_Adot3B__times_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1755,32 +2185,32 @@ GrB_Info GB_Adot3B__times_first_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_first_fp64
+GrB_Info GB_Asaxpy3B__times_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_first_bool
+GrB_Info GB_Adot4B__times_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1791,7 +2221,7 @@ GrB_Info GB_Adot2B__lor_first_bool
 GrB_Info GB_Adot3B__lor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1799,32 +2229,32 @@ GrB_Info GB_Adot3B__lor_first_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_first_bool
+GrB_Info GB_Asaxpy3B__lor_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_first_bool
+GrB_Info GB_Adot4B__lor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__land_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1835,7 +2265,7 @@ GrB_Info GB_Adot2B__land_first_bool
 GrB_Info GB_Adot3B__land_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1843,32 +2273,32 @@ GrB_Info GB_Adot3B__land_first_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_first_bool
+GrB_Info GB_Asaxpy3B__land_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_first_bool
+GrB_Info GB_Adot4B__land_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lxor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1879,7 +2309,7 @@ GrB_Info GB_Adot2B__lxor_first_bool
 GrB_Info GB_Adot3B__lxor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1887,32 +2317,76 @@ GrB_Info GB_Adot3B__lxor_first_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_first_bool
+GrB_Info GB_Asaxpy3B__lxor_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__lxor_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
 ) ;
 
+GrB_Info GB_Adot3B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__eq_first_bool
+GrB_Info GB_Asaxpy3B__any_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
 GrB_Info GB_Adot2B__eq_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1923,7 +2397,7 @@ GrB_Info GB_Adot2B__eq_first_bool
 GrB_Info GB_Adot3B__eq_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1931,32 +2405,32 @@ GrB_Info GB_Adot3B__eq_first_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_first_bool
+GrB_Info GB_Asaxpy3B__eq_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_int8
+GrB_Info GB_Adot4B__eq_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -1967,7 +2441,7 @@ GrB_Info GB_Adot2B__min_second_int8
 GrB_Info GB_Adot3B__min_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -1975,32 +2449,32 @@ GrB_Info GB_Adot3B__min_second_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_int8
+GrB_Info GB_Asaxpy3B__min_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_int16
+GrB_Info GB_Adot4B__min_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2011,7 +2485,7 @@ GrB_Info GB_Adot2B__min_second_int16
 GrB_Info GB_Adot3B__min_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2019,32 +2493,32 @@ GrB_Info GB_Adot3B__min_second_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_int16
+GrB_Info GB_Asaxpy3B__min_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_int32
+GrB_Info GB_Adot4B__min_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2055,7 +2529,7 @@ GrB_Info GB_Adot2B__min_second_int32
 GrB_Info GB_Adot3B__min_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2063,32 +2537,32 @@ GrB_Info GB_Adot3B__min_second_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_int32
+GrB_Info GB_Asaxpy3B__min_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_int64
+GrB_Info GB_Adot4B__min_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2099,7 +2573,7 @@ GrB_Info GB_Adot2B__min_second_int64
 GrB_Info GB_Adot3B__min_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2107,32 +2581,32 @@ GrB_Info GB_Adot3B__min_second_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_int64
+GrB_Info GB_Asaxpy3B__min_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_uint8
+GrB_Info GB_Adot4B__min_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2143,7 +2617,7 @@ GrB_Info GB_Adot2B__min_second_uint8
 GrB_Info GB_Adot3B__min_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2151,32 +2625,32 @@ GrB_Info GB_Adot3B__min_second_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_uint8
+GrB_Info GB_Asaxpy3B__min_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_uint16
+GrB_Info GB_Adot4B__min_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2187,7 +2661,7 @@ GrB_Info GB_Adot2B__min_second_uint16
 GrB_Info GB_Adot3B__min_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2195,32 +2669,32 @@ GrB_Info GB_Adot3B__min_second_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_uint16
+GrB_Info GB_Asaxpy3B__min_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_uint32
+GrB_Info GB_Adot4B__min_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2231,7 +2705,7 @@ GrB_Info GB_Adot2B__min_second_uint32
 GrB_Info GB_Adot3B__min_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2239,32 +2713,32 @@ GrB_Info GB_Adot3B__min_second_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_uint32
+GrB_Info GB_Asaxpy3B__min_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_uint64
+GrB_Info GB_Adot4B__min_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2275,7 +2749,7 @@ GrB_Info GB_Adot2B__min_second_uint64
 GrB_Info GB_Adot3B__min_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2283,32 +2757,32 @@ GrB_Info GB_Adot3B__min_second_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_uint64
+GrB_Info GB_Asaxpy3B__min_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_fp32
+GrB_Info GB_Adot4B__min_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2319,7 +2793,7 @@ GrB_Info GB_Adot2B__min_second_fp32
 GrB_Info GB_Adot3B__min_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2327,32 +2801,32 @@ GrB_Info GB_Adot3B__min_second_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_fp32
+GrB_Info GB_Asaxpy3B__min_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_second_fp64
+GrB_Info GB_Adot4B__min_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2363,7 +2837,7 @@ GrB_Info GB_Adot2B__min_second_fp64
 GrB_Info GB_Adot3B__min_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2371,32 +2845,32 @@ GrB_Info GB_Adot3B__min_second_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_second_fp64
+GrB_Info GB_Asaxpy3B__min_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_int8
+GrB_Info GB_Adot4B__min_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2407,7 +2881,7 @@ GrB_Info GB_Adot2B__max_second_int8
 GrB_Info GB_Adot3B__max_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2415,32 +2889,32 @@ GrB_Info GB_Adot3B__max_second_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_int8
+GrB_Info GB_Asaxpy3B__max_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_int16
+GrB_Info GB_Adot4B__max_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2451,7 +2925,7 @@ GrB_Info GB_Adot2B__max_second_int16
 GrB_Info GB_Adot3B__max_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2459,32 +2933,32 @@ GrB_Info GB_Adot3B__max_second_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_int16
+GrB_Info GB_Asaxpy3B__max_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_int32
+GrB_Info GB_Adot4B__max_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2495,7 +2969,7 @@ GrB_Info GB_Adot2B__max_second_int32
 GrB_Info GB_Adot3B__max_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2503,32 +2977,32 @@ GrB_Info GB_Adot3B__max_second_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_int32
+GrB_Info GB_Asaxpy3B__max_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_int64
+GrB_Info GB_Adot4B__max_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2539,7 +3013,7 @@ GrB_Info GB_Adot2B__max_second_int64
 GrB_Info GB_Adot3B__max_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2547,32 +3021,32 @@ GrB_Info GB_Adot3B__max_second_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_int64
+GrB_Info GB_Asaxpy3B__max_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_uint8
+GrB_Info GB_Adot4B__max_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2583,7 +3057,7 @@ GrB_Info GB_Adot2B__max_second_uint8
 GrB_Info GB_Adot3B__max_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2591,32 +3065,32 @@ GrB_Info GB_Adot3B__max_second_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_uint8
+GrB_Info GB_Asaxpy3B__max_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_uint16
+GrB_Info GB_Adot4B__max_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2627,7 +3101,7 @@ GrB_Info GB_Adot2B__max_second_uint16
 GrB_Info GB_Adot3B__max_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2635,32 +3109,32 @@ GrB_Info GB_Adot3B__max_second_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_uint16
+GrB_Info GB_Asaxpy3B__max_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_uint32
+GrB_Info GB_Adot4B__max_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2671,7 +3145,7 @@ GrB_Info GB_Adot2B__max_second_uint32
 GrB_Info GB_Adot3B__max_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2679,32 +3153,32 @@ GrB_Info GB_Adot3B__max_second_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_uint32
+GrB_Info GB_Asaxpy3B__max_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_uint64
+GrB_Info GB_Adot4B__max_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2715,7 +3189,7 @@ GrB_Info GB_Adot2B__max_second_uint64
 GrB_Info GB_Adot3B__max_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2723,32 +3197,32 @@ GrB_Info GB_Adot3B__max_second_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_uint64
+GrB_Info GB_Asaxpy3B__max_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_fp32
+GrB_Info GB_Adot4B__max_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2759,7 +3233,7 @@ GrB_Info GB_Adot2B__max_second_fp32
 GrB_Info GB_Adot3B__max_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2767,32 +3241,32 @@ GrB_Info GB_Adot3B__max_second_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_fp32
+GrB_Info GB_Asaxpy3B__max_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_second_fp64
+GrB_Info GB_Adot4B__max_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2803,7 +3277,7 @@ GrB_Info GB_Adot2B__max_second_fp64
 GrB_Info GB_Adot3B__max_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2811,32 +3285,32 @@ GrB_Info GB_Adot3B__max_second_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_second_fp64
+GrB_Info GB_Asaxpy3B__max_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_int8
+GrB_Info GB_Adot4B__max_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_int8
+GrB_Info GB_Adot2B__any_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2844,10 +3318,10 @@ GrB_Info GB_Adot2B__plus_second_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_int8
+GrB_Info GB_Adot3B__any_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2855,32 +3329,32 @@ GrB_Info GB_Adot3B__plus_second_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_int8
+GrB_Info GB_Asaxpy3B__any_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_uint8
+GrB_Info GB_Adot4B__any_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_uint8
+GrB_Info GB_Adot2B__any_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2888,10 +3362,10 @@ GrB_Info GB_Adot2B__plus_second_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_uint8
+GrB_Info GB_Adot3B__any_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2899,32 +3373,32 @@ GrB_Info GB_Adot3B__plus_second_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_uint8
+GrB_Info GB_Asaxpy3B__any_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_int16
+GrB_Info GB_Adot4B__any_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_int16
+GrB_Info GB_Adot2B__any_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2932,10 +3406,10 @@ GrB_Info GB_Adot2B__plus_second_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_int16
+GrB_Info GB_Adot3B__any_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2943,32 +3417,32 @@ GrB_Info GB_Adot3B__plus_second_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_int16
+GrB_Info GB_Asaxpy3B__any_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_uint16
+GrB_Info GB_Adot4B__any_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_uint16
+GrB_Info GB_Adot2B__any_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -2976,10 +3450,10 @@ GrB_Info GB_Adot2B__plus_second_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_uint16
+GrB_Info GB_Adot3B__any_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -2987,32 +3461,32 @@ GrB_Info GB_Adot3B__plus_second_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_uint16
+GrB_Info GB_Asaxpy3B__any_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_int32
+GrB_Info GB_Adot4B__any_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_int32
+GrB_Info GB_Adot2B__any_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3020,10 +3494,10 @@ GrB_Info GB_Adot2B__plus_second_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_int32
+GrB_Info GB_Adot3B__any_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3031,32 +3505,32 @@ GrB_Info GB_Adot3B__plus_second_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_int32
+GrB_Info GB_Asaxpy3B__any_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_uint32
+GrB_Info GB_Adot4B__any_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_uint32
+GrB_Info GB_Adot2B__any_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3064,10 +3538,10 @@ GrB_Info GB_Adot2B__plus_second_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_uint32
+GrB_Info GB_Adot3B__any_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3075,32 +3549,32 @@ GrB_Info GB_Adot3B__plus_second_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_uint32
+GrB_Info GB_Asaxpy3B__any_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_int64
+GrB_Info GB_Adot4B__any_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_int64
+GrB_Info GB_Adot2B__any_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3108,10 +3582,10 @@ GrB_Info GB_Adot2B__plus_second_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_int64
+GrB_Info GB_Adot3B__any_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3119,32 +3593,32 @@ GrB_Info GB_Adot3B__plus_second_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_int64
+GrB_Info GB_Asaxpy3B__any_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_uint64
+GrB_Info GB_Adot4B__any_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_uint64
+GrB_Info GB_Adot2B__any_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3152,10 +3626,10 @@ GrB_Info GB_Adot2B__plus_second_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_uint64
+GrB_Info GB_Adot3B__any_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3163,32 +3637,32 @@ GrB_Info GB_Adot3B__plus_second_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_uint64
+GrB_Info GB_Asaxpy3B__any_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_fp32
+GrB_Info GB_Adot4B__any_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_fp32
+GrB_Info GB_Adot2B__any_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3196,10 +3670,10 @@ GrB_Info GB_Adot2B__plus_second_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_fp32
+GrB_Info GB_Adot3B__any_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3207,32 +3681,32 @@ GrB_Info GB_Adot3B__plus_second_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_fp32
+GrB_Info GB_Asaxpy3B__any_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_second_fp64
+GrB_Info GB_Adot4B__any_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_second_fp64
+GrB_Info GB_Adot2B__any_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3240,10 +3714,10 @@ GrB_Info GB_Adot2B__plus_second_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_second_fp64
+GrB_Info GB_Adot3B__any_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3251,32 +3725,32 @@ GrB_Info GB_Adot3B__plus_second_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_second_fp64
+GrB_Info GB_Asaxpy3B__any_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_int8
+GrB_Info GB_Adot4B__any_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_int8
+GrB_Info GB_Adot2B__plus_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3284,10 +3758,10 @@ GrB_Info GB_Adot2B__times_second_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_int8
+GrB_Info GB_Adot3B__plus_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3295,32 +3769,32 @@ GrB_Info GB_Adot3B__times_second_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_int8
+GrB_Info GB_Asaxpy3B__plus_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_uint8
+GrB_Info GB_Adot4B__plus_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_uint8
+GrB_Info GB_Adot2B__plus_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3328,10 +3802,10 @@ GrB_Info GB_Adot2B__times_second_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_uint8
+GrB_Info GB_Adot3B__plus_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3339,32 +3813,32 @@ GrB_Info GB_Adot3B__times_second_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_uint8
+GrB_Info GB_Asaxpy3B__plus_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_int16
+GrB_Info GB_Adot4B__plus_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_int16
+GrB_Info GB_Adot2B__plus_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3372,10 +3846,10 @@ GrB_Info GB_Adot2B__times_second_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_int16
+GrB_Info GB_Adot3B__plus_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3383,32 +3857,32 @@ GrB_Info GB_Adot3B__times_second_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_int16
+GrB_Info GB_Asaxpy3B__plus_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_uint16
+GrB_Info GB_Adot4B__plus_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_uint16
+GrB_Info GB_Adot2B__plus_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3416,10 +3890,10 @@ GrB_Info GB_Adot2B__times_second_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_uint16
+GrB_Info GB_Adot3B__plus_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3427,32 +3901,32 @@ GrB_Info GB_Adot3B__times_second_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_uint16
+GrB_Info GB_Asaxpy3B__plus_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_int32
+GrB_Info GB_Adot4B__plus_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_int32
+GrB_Info GB_Adot2B__plus_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3460,10 +3934,10 @@ GrB_Info GB_Adot2B__times_second_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_int32
+GrB_Info GB_Adot3B__plus_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3471,32 +3945,32 @@ GrB_Info GB_Adot3B__times_second_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_int32
+GrB_Info GB_Asaxpy3B__plus_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_uint32
+GrB_Info GB_Adot4B__plus_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_uint32
+GrB_Info GB_Adot2B__plus_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3504,10 +3978,10 @@ GrB_Info GB_Adot2B__times_second_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_uint32
+GrB_Info GB_Adot3B__plus_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3515,32 +3989,32 @@ GrB_Info GB_Adot3B__times_second_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_uint32
+GrB_Info GB_Asaxpy3B__plus_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_int64
+GrB_Info GB_Adot4B__plus_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_int64
+GrB_Info GB_Adot2B__plus_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3548,10 +4022,10 @@ GrB_Info GB_Adot2B__times_second_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_int64
+GrB_Info GB_Adot3B__plus_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3559,32 +4033,32 @@ GrB_Info GB_Adot3B__times_second_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_int64
+GrB_Info GB_Asaxpy3B__plus_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_uint64
+GrB_Info GB_Adot4B__plus_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_uint64
+GrB_Info GB_Adot2B__plus_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3592,10 +4066,10 @@ GrB_Info GB_Adot2B__times_second_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_uint64
+GrB_Info GB_Adot3B__plus_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3603,32 +4077,32 @@ GrB_Info GB_Adot3B__times_second_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_uint64
+GrB_Info GB_Asaxpy3B__plus_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_fp32
+GrB_Info GB_Adot4B__plus_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_fp32
+GrB_Info GB_Adot2B__plus_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3636,10 +4110,10 @@ GrB_Info GB_Adot2B__times_second_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_fp32
+GrB_Info GB_Adot3B__plus_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3647,32 +4121,32 @@ GrB_Info GB_Adot3B__times_second_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_fp32
+GrB_Info GB_Asaxpy3B__plus_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_second_fp64
+GrB_Info GB_Adot4B__plus_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_second_fp64
+GrB_Info GB_Adot2B__plus_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3680,10 +4154,10 @@ GrB_Info GB_Adot2B__times_second_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_second_fp64
+GrB_Info GB_Adot3B__plus_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3691,32 +4165,32 @@ GrB_Info GB_Adot3B__times_second_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_second_fp64
+GrB_Info GB_Asaxpy3B__plus_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_second_bool
+GrB_Info GB_Adot4B__plus_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_second_bool
+GrB_Info GB_Adot2B__times_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3724,10 +4198,10 @@ GrB_Info GB_Adot2B__lor_second_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_second_bool
+GrB_Info GB_Adot3B__times_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3735,32 +4209,32 @@ GrB_Info GB_Adot3B__lor_second_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_second_bool
+GrB_Info GB_Asaxpy3B__times_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_second_bool
+GrB_Info GB_Adot4B__times_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_second_bool
+GrB_Info GB_Adot2B__times_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3768,10 +4242,10 @@ GrB_Info GB_Adot2B__land_second_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_second_bool
+GrB_Info GB_Adot3B__times_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3779,32 +4253,32 @@ GrB_Info GB_Adot3B__land_second_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_second_bool
+GrB_Info GB_Asaxpy3B__times_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_second_bool
+GrB_Info GB_Adot4B__times_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_second_bool
+GrB_Info GB_Adot2B__times_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3812,10 +4286,10 @@ GrB_Info GB_Adot2B__lxor_second_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_second_bool
+GrB_Info GB_Adot3B__times_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3823,32 +4297,32 @@ GrB_Info GB_Adot3B__lxor_second_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_second_bool
+GrB_Info GB_Asaxpy3B__times_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_second_bool
+GrB_Info GB_Adot4B__times_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_second_bool
+GrB_Info GB_Adot2B__times_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3856,10 +4330,10 @@ GrB_Info GB_Adot2B__eq_second_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_second_bool
+GrB_Info GB_Adot3B__times_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3867,33 +4341,32 @@ GrB_Info GB_Adot3B__eq_second_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_second_bool
+GrB_Info GB_Asaxpy3B__times_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_int8
+GrB_Info GB_Adot4B__times_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_int8
+GrB_Info GB_Adot2B__times_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3901,10 +4374,10 @@ GrB_Info GB_Adot2B__min_min_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_int8
+GrB_Info GB_Adot3B__times_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3912,33 +4385,32 @@ GrB_Info GB_Adot3B__min_min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_int8
+GrB_Info GB_Asaxpy3B__times_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_int16
+GrB_Info GB_Adot4B__times_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_int16
+GrB_Info GB_Adot2B__times_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3946,10 +4418,10 @@ GrB_Info GB_Adot2B__min_min_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_int16
+GrB_Info GB_Adot3B__times_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -3957,33 +4429,32 @@ GrB_Info GB_Adot3B__min_min_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_int16
+GrB_Info GB_Asaxpy3B__times_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_int32
+GrB_Info GB_Adot4B__times_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_int32
+GrB_Info GB_Adot2B__times_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -3991,10 +4462,10 @@ GrB_Info GB_Adot2B__min_min_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_int32
+GrB_Info GB_Adot3B__times_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4002,33 +4473,32 @@ GrB_Info GB_Adot3B__min_min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_int32
+GrB_Info GB_Asaxpy3B__times_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_int64
+GrB_Info GB_Adot4B__times_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_int64
+GrB_Info GB_Adot2B__times_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4036,10 +4506,10 @@ GrB_Info GB_Adot2B__min_min_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_int64
+GrB_Info GB_Adot3B__times_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4047,33 +4517,32 @@ GrB_Info GB_Adot3B__min_min_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_int64
+GrB_Info GB_Asaxpy3B__times_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_uint8
+GrB_Info GB_Adot4B__times_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_uint8
+GrB_Info GB_Adot2B__times_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4081,10 +4550,10 @@ GrB_Info GB_Adot2B__min_min_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_uint8
+GrB_Info GB_Adot3B__times_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4092,33 +4561,32 @@ GrB_Info GB_Adot3B__min_min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_uint8
+GrB_Info GB_Asaxpy3B__times_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_uint16
+GrB_Info GB_Adot4B__times_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_uint16
+GrB_Info GB_Adot2B__times_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4126,10 +4594,10 @@ GrB_Info GB_Adot2B__min_min_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_uint16
+GrB_Info GB_Adot3B__times_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4137,33 +4605,32 @@ GrB_Info GB_Adot3B__min_min_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_uint16
+GrB_Info GB_Asaxpy3B__times_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_uint32
+GrB_Info GB_Adot4B__times_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_uint32
+GrB_Info GB_Adot2B__lor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4171,10 +4638,10 @@ GrB_Info GB_Adot2B__min_min_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_uint32
+GrB_Info GB_Adot3B__lor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4182,33 +4649,32 @@ GrB_Info GB_Adot3B__min_min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_uint32
+GrB_Info GB_Asaxpy3B__lor_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_min_uint64
+GrB_Info GB_Adot4B__lor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_uint64
+GrB_Info GB_Adot2B__land_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4216,10 +4682,10 @@ GrB_Info GB_Adot2B__min_min_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_uint64
+GrB_Info GB_Adot3B__land_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4227,32 +4693,32 @@ GrB_Info GB_Adot3B__min_min_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_uint64
+GrB_Info GB_Asaxpy3B__land_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_min_fp32
+GrB_Info GB_Adot4B__land_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_fp32
+GrB_Info GB_Adot2B__lxor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4260,10 +4726,10 @@ GrB_Info GB_Adot2B__min_min_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_fp32
+GrB_Info GB_Adot3B__lxor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4271,32 +4737,32 @@ GrB_Info GB_Adot3B__min_min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_fp32
+GrB_Info GB_Asaxpy3B__lxor_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_min_fp64
+GrB_Info GB_Adot4B__lxor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_min_fp64
+GrB_Info GB_Adot2B__any_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4304,10 +4770,10 @@ GrB_Info GB_Adot2B__min_min_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_min_fp64
+GrB_Info GB_Adot3B__any_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4315,33 +4781,32 @@ GrB_Info GB_Adot3B__min_min_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_min_fp64
+GrB_Info GB_Asaxpy3B__any_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_int8
+GrB_Info GB_Adot4B__any_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_int8
+GrB_Info GB_Adot2B__eq_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4349,10 +4814,10 @@ GrB_Info GB_Adot2B__max_min_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_int8
+GrB_Info GB_Adot3B__eq_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4360,33 +4825,32 @@ GrB_Info GB_Adot3B__max_min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_int8
+GrB_Info GB_Asaxpy3B__eq_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_int16
+GrB_Info GB_Adot4B__eq_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_int16
+GrB_Info GB_Adot2B__any_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4394,10 +4858,10 @@ GrB_Info GB_Adot2B__max_min_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_int16
+GrB_Info GB_Adot3B__any_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4405,33 +4869,32 @@ GrB_Info GB_Adot3B__max_min_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_int16
+GrB_Info GB_Asaxpy3B__any_pair_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_int32
+GrB_Info GB_Adot4B__any_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_int32
+GrB_Info GB_Adot2B__any_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4439,10 +4902,10 @@ GrB_Info GB_Adot2B__max_min_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_int32
+GrB_Info GB_Adot3B__any_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4450,33 +4913,32 @@ GrB_Info GB_Adot3B__max_min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_int32
+GrB_Info GB_Asaxpy3B__any_pair_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_int64
+GrB_Info GB_Adot4B__any_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_int64
+GrB_Info GB_Adot2B__any_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4484,10 +4946,10 @@ GrB_Info GB_Adot2B__max_min_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_int64
+GrB_Info GB_Adot3B__any_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4495,33 +4957,32 @@ GrB_Info GB_Adot3B__max_min_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_int64
+GrB_Info GB_Asaxpy3B__any_pair_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_uint8
+GrB_Info GB_Adot4B__any_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_uint8
+GrB_Info GB_Adot2B__any_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4529,10 +4990,10 @@ GrB_Info GB_Adot2B__max_min_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_uint8
+GrB_Info GB_Adot3B__any_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4540,33 +5001,32 @@ GrB_Info GB_Adot3B__max_min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_uint8
+GrB_Info GB_Asaxpy3B__any_pair_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_uint16
+GrB_Info GB_Adot4B__any_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_uint16
+GrB_Info GB_Adot2B__any_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4574,10 +5034,10 @@ GrB_Info GB_Adot2B__max_min_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_uint16
+GrB_Info GB_Adot3B__any_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4585,33 +5045,32 @@ GrB_Info GB_Adot3B__max_min_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_uint16
+GrB_Info GB_Asaxpy3B__any_pair_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_uint32
+GrB_Info GB_Adot4B__any_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_uint32
+GrB_Info GB_Adot2B__any_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4619,10 +5078,10 @@ GrB_Info GB_Adot2B__max_min_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_uint32
+GrB_Info GB_Adot3B__any_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4630,33 +5089,32 @@ GrB_Info GB_Adot3B__max_min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_uint32
+GrB_Info GB_Asaxpy3B__any_pair_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_min_uint64
+GrB_Info GB_Adot4B__any_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_uint64
+GrB_Info GB_Adot2B__any_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4664,10 +5122,10 @@ GrB_Info GB_Adot2B__max_min_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_uint64
+GrB_Info GB_Adot3B__any_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4675,32 +5133,32 @@ GrB_Info GB_Adot3B__max_min_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_uint64
+GrB_Info GB_Asaxpy3B__any_pair_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_min_fp32
+GrB_Info GB_Adot4B__any_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_fp32
+GrB_Info GB_Adot2B__any_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4708,10 +5166,10 @@ GrB_Info GB_Adot2B__max_min_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_fp32
+GrB_Info GB_Adot3B__any_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4719,32 +5177,32 @@ GrB_Info GB_Adot3B__max_min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_fp32
+GrB_Info GB_Asaxpy3B__any_pair_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_min_fp64
+GrB_Info GB_Adot4B__any_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_min_fp64
+GrB_Info GB_Adot2B__any_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4752,10 +5210,10 @@ GrB_Info GB_Adot2B__max_min_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_min_fp64
+GrB_Info GB_Adot3B__any_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4763,33 +5221,32 @@ GrB_Info GB_Adot3B__max_min_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_min_fp64
+GrB_Info GB_Asaxpy3B__any_pair_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_int8
+GrB_Info GB_Adot4B__any_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_int8
+GrB_Info GB_Adot2B__any_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4797,10 +5254,10 @@ GrB_Info GB_Adot2B__plus_min_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_int8
+GrB_Info GB_Adot3B__any_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4808,33 +5265,32 @@ GrB_Info GB_Adot3B__plus_min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_int8
+GrB_Info GB_Asaxpy3B__any_pair_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_uint8
+GrB_Info GB_Adot4B__any_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_uint8
+GrB_Info GB_Adot2B__plus_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4842,10 +5298,10 @@ GrB_Info GB_Adot2B__plus_min_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_uint8
+GrB_Info GB_Adot3B__plus_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4853,33 +5309,32 @@ GrB_Info GB_Adot3B__plus_min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_uint8
+GrB_Info GB_Asaxpy3B__plus_pair_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_int16
+GrB_Info GB_Adot4B__plus_pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_int16
+GrB_Info GB_Adot2B__plus_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4887,10 +5342,10 @@ GrB_Info GB_Adot2B__plus_min_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_int16
+GrB_Info GB_Adot3B__plus_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4898,33 +5353,32 @@ GrB_Info GB_Adot3B__plus_min_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_int16
+GrB_Info GB_Asaxpy3B__plus_pair_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_uint16
+GrB_Info GB_Adot4B__plus_pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_uint16
+GrB_Info GB_Adot2B__plus_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4932,10 +5386,10 @@ GrB_Info GB_Adot2B__plus_min_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_uint16
+GrB_Info GB_Adot3B__plus_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4943,33 +5397,32 @@ GrB_Info GB_Adot3B__plus_min_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_uint16
+GrB_Info GB_Asaxpy3B__plus_pair_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_int32
+GrB_Info GB_Adot4B__plus_pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_int32
+GrB_Info GB_Adot2B__plus_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -4977,10 +5430,10 @@ GrB_Info GB_Adot2B__plus_min_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_int32
+GrB_Info GB_Adot3B__plus_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -4988,33 +5441,32 @@ GrB_Info GB_Adot3B__plus_min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_int32
+GrB_Info GB_Asaxpy3B__plus_pair_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_uint32
+GrB_Info GB_Adot4B__plus_pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_uint32
+GrB_Info GB_Adot2B__plus_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5022,10 +5474,10 @@ GrB_Info GB_Adot2B__plus_min_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_uint32
+GrB_Info GB_Adot3B__plus_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5033,33 +5485,32 @@ GrB_Info GB_Adot3B__plus_min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_uint32
+GrB_Info GB_Asaxpy3B__plus_pair_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_int64
+GrB_Info GB_Adot4B__plus_pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_int64
+GrB_Info GB_Adot2B__plus_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5067,10 +5518,10 @@ GrB_Info GB_Adot2B__plus_min_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_int64
+GrB_Info GB_Adot3B__plus_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5078,33 +5529,32 @@ GrB_Info GB_Adot3B__plus_min_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_int64
+GrB_Info GB_Asaxpy3B__plus_pair_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_min_uint64
+GrB_Info GB_Adot4B__plus_pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_uint64
+GrB_Info GB_Adot2B__plus_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5112,10 +5562,10 @@ GrB_Info GB_Adot2B__plus_min_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_uint64
+GrB_Info GB_Adot3B__plus_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5123,32 +5573,32 @@ GrB_Info GB_Adot3B__plus_min_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_uint64
+GrB_Info GB_Asaxpy3B__plus_pair_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_min_fp32
+GrB_Info GB_Adot4B__plus_pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_fp32
+GrB_Info GB_Adot2B__plus_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5156,10 +5606,10 @@ GrB_Info GB_Adot2B__plus_min_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_fp32
+GrB_Info GB_Adot3B__plus_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5167,32 +5617,32 @@ GrB_Info GB_Adot3B__plus_min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_fp32
+GrB_Info GB_Asaxpy3B__plus_pair_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_min_fp64
+GrB_Info GB_Adot4B__plus_pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_min_fp64
+GrB_Info GB_Adot2B__plus_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5200,10 +5650,10 @@ GrB_Info GB_Adot2B__plus_min_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_min_fp64
+GrB_Info GB_Adot3B__plus_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5211,33 +5661,32 @@ GrB_Info GB_Adot3B__plus_min_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_min_fp64
+GrB_Info GB_Asaxpy3B__plus_pair_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_int8
+GrB_Info GB_Adot4B__plus_pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_int8
+GrB_Info GB_Adot2B__plus_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5245,10 +5694,10 @@ GrB_Info GB_Adot2B__times_min_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_int8
+GrB_Info GB_Adot3B__plus_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5256,33 +5705,32 @@ GrB_Info GB_Adot3B__times_min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_int8
+GrB_Info GB_Asaxpy3B__plus_pair_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_uint8
+GrB_Info GB_Adot4B__plus_pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_uint8
+GrB_Info GB_Adot2B__lxor_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5290,10 +5738,10 @@ GrB_Info GB_Adot2B__times_min_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_uint8
+GrB_Info GB_Adot3B__lxor_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5301,33 +5749,32 @@ GrB_Info GB_Adot3B__times_min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_uint8
+GrB_Info GB_Asaxpy3B__lxor_pair_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_int16
+GrB_Info GB_Adot4B__lxor_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_int16
+GrB_Info GB_Adot2B__any_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5335,10 +5782,10 @@ GrB_Info GB_Adot2B__times_min_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_int16
+GrB_Info GB_Adot3B__any_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5346,33 +5793,32 @@ GrB_Info GB_Adot3B__times_min_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_int16
+GrB_Info GB_Asaxpy3B__any_pair_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_uint16
+GrB_Info GB_Adot4B__any_pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_uint16
+GrB_Info GB_Adot2B__min_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5380,10 +5826,10 @@ GrB_Info GB_Adot2B__times_min_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_uint16
+GrB_Info GB_Adot3B__min_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5391,33 +5837,32 @@ GrB_Info GB_Adot3B__times_min_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_uint16
+GrB_Info GB_Asaxpy3B__min_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_int32
+GrB_Info GB_Adot4B__min_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_int32
+GrB_Info GB_Adot2B__min_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5425,10 +5870,10 @@ GrB_Info GB_Adot2B__times_min_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_int32
+GrB_Info GB_Adot3B__min_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5436,33 +5881,32 @@ GrB_Info GB_Adot3B__times_min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_int32
+GrB_Info GB_Asaxpy3B__min_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_uint32
+GrB_Info GB_Adot4B__min_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_uint32
+GrB_Info GB_Adot2B__min_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5470,10 +5914,10 @@ GrB_Info GB_Adot2B__times_min_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_uint32
+GrB_Info GB_Adot3B__min_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5481,33 +5925,32 @@ GrB_Info GB_Adot3B__times_min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_uint32
+GrB_Info GB_Asaxpy3B__min_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_int64
+GrB_Info GB_Adot4B__min_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_int64
+GrB_Info GB_Adot2B__min_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5515,10 +5958,10 @@ GrB_Info GB_Adot2B__times_min_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_int64
+GrB_Info GB_Adot3B__min_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5526,33 +5969,32 @@ GrB_Info GB_Adot3B__times_min_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_int64
+GrB_Info GB_Asaxpy3B__min_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_min_uint64
+GrB_Info GB_Adot4B__min_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_uint64
+GrB_Info GB_Adot2B__min_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5560,10 +6002,10 @@ GrB_Info GB_Adot2B__times_min_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_uint64
+GrB_Info GB_Adot3B__min_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5571,32 +6013,32 @@ GrB_Info GB_Adot3B__times_min_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_uint64
+GrB_Info GB_Asaxpy3B__min_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_min_fp32
+GrB_Info GB_Adot4B__min_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_fp32
+GrB_Info GB_Adot2B__min_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5604,10 +6046,10 @@ GrB_Info GB_Adot2B__times_min_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_fp32
+GrB_Info GB_Adot3B__min_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5615,32 +6057,32 @@ GrB_Info GB_Adot3B__times_min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_fp32
+GrB_Info GB_Asaxpy3B__min_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_min_fp64
+GrB_Info GB_Adot4B__min_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_min_fp64
+GrB_Info GB_Adot2B__min_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5648,10 +6090,10 @@ GrB_Info GB_Adot2B__times_min_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_min_fp64
+GrB_Info GB_Adot3B__min_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5659,33 +6101,32 @@ GrB_Info GB_Adot3B__times_min_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_min_fp64
+GrB_Info GB_Asaxpy3B__min_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_int8
+GrB_Info GB_Adot4B__min_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_int8
+GrB_Info GB_Adot2B__min_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5693,10 +6134,10 @@ GrB_Info GB_Adot2B__min_max_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_int8
+GrB_Info GB_Adot3B__min_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5704,33 +6145,32 @@ GrB_Info GB_Adot3B__min_max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_int8
+GrB_Info GB_Asaxpy3B__min_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_int16
+GrB_Info GB_Adot4B__min_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_int16
+GrB_Info GB_Adot2B__min_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5738,10 +6178,10 @@ GrB_Info GB_Adot2B__min_max_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_int16
+GrB_Info GB_Adot3B__min_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5749,33 +6189,32 @@ GrB_Info GB_Adot3B__min_max_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_int16
+GrB_Info GB_Asaxpy3B__min_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_int32
+GrB_Info GB_Adot4B__min_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_int32
+GrB_Info GB_Adot2B__min_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5783,10 +6222,10 @@ GrB_Info GB_Adot2B__min_max_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_int32
+GrB_Info GB_Adot3B__min_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5794,33 +6233,32 @@ GrB_Info GB_Adot3B__min_max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_int32
+GrB_Info GB_Asaxpy3B__min_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_int64
+GrB_Info GB_Adot4B__min_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_int64
+GrB_Info GB_Adot2B__max_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5828,10 +6266,10 @@ GrB_Info GB_Adot2B__min_max_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_int64
+GrB_Info GB_Adot3B__max_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5839,33 +6277,32 @@ GrB_Info GB_Adot3B__min_max_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_int64
+GrB_Info GB_Asaxpy3B__max_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_uint8
+GrB_Info GB_Adot4B__max_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_uint8
+GrB_Info GB_Adot2B__max_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5873,10 +6310,10 @@ GrB_Info GB_Adot2B__min_max_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_uint8
+GrB_Info GB_Adot3B__max_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5884,33 +6321,32 @@ GrB_Info GB_Adot3B__min_max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_uint8
+GrB_Info GB_Asaxpy3B__max_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_uint16
+GrB_Info GB_Adot4B__max_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_uint16
+GrB_Info GB_Adot2B__max_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5918,10 +6354,10 @@ GrB_Info GB_Adot2B__min_max_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_uint16
+GrB_Info GB_Adot3B__max_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5929,33 +6365,32 @@ GrB_Info GB_Adot3B__min_max_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_uint16
+GrB_Info GB_Asaxpy3B__max_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_uint32
+GrB_Info GB_Adot4B__max_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_uint32
+GrB_Info GB_Adot2B__max_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -5963,10 +6398,10 @@ GrB_Info GB_Adot2B__min_max_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_uint32
+GrB_Info GB_Adot3B__max_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -5974,33 +6409,32 @@ GrB_Info GB_Adot3B__min_max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_uint32
+GrB_Info GB_Asaxpy3B__max_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_max_uint64
+GrB_Info GB_Adot4B__max_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_uint64
+GrB_Info GB_Adot2B__max_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6008,10 +6442,10 @@ GrB_Info GB_Adot2B__min_max_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_uint64
+GrB_Info GB_Adot3B__max_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6019,32 +6453,32 @@ GrB_Info GB_Adot3B__min_max_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_uint64
+GrB_Info GB_Asaxpy3B__max_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_max_fp32
+GrB_Info GB_Adot4B__max_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_fp32
+GrB_Info GB_Adot2B__max_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6052,10 +6486,10 @@ GrB_Info GB_Adot2B__min_max_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_fp32
+GrB_Info GB_Adot3B__max_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6063,32 +6497,32 @@ GrB_Info GB_Adot3B__min_max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_fp32
+GrB_Info GB_Asaxpy3B__max_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_max_fp64
+GrB_Info GB_Adot4B__max_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_max_fp64
+GrB_Info GB_Adot2B__max_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6096,10 +6530,10 @@ GrB_Info GB_Adot2B__min_max_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_max_fp64
+GrB_Info GB_Adot3B__max_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6107,33 +6541,32 @@ GrB_Info GB_Adot3B__min_max_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_max_fp64
+GrB_Info GB_Asaxpy3B__max_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_int8
+GrB_Info GB_Adot4B__max_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_int8
+GrB_Info GB_Adot2B__max_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6141,10 +6574,10 @@ GrB_Info GB_Adot2B__max_max_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_int8
+GrB_Info GB_Adot3B__max_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6152,33 +6585,32 @@ GrB_Info GB_Adot3B__max_max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_int8
+GrB_Info GB_Asaxpy3B__max_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_int16
+GrB_Info GB_Adot4B__max_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_int16
+GrB_Info GB_Adot2B__max_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6186,10 +6618,10 @@ GrB_Info GB_Adot2B__max_max_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_int16
+GrB_Info GB_Adot3B__max_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6197,33 +6629,32 @@ GrB_Info GB_Adot3B__max_max_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_int16
+GrB_Info GB_Asaxpy3B__max_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_int32
+GrB_Info GB_Adot4B__max_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_int32
+GrB_Info GB_Adot2B__max_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6231,10 +6662,10 @@ GrB_Info GB_Adot2B__max_max_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_int32
+GrB_Info GB_Adot3B__max_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6242,33 +6673,32 @@ GrB_Info GB_Adot3B__max_max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_int32
+GrB_Info GB_Asaxpy3B__max_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_int64
+GrB_Info GB_Adot4B__max_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_int64
+GrB_Info GB_Adot2B__any_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6276,10 +6706,10 @@ GrB_Info GB_Adot2B__max_max_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_int64
+GrB_Info GB_Adot3B__any_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6287,33 +6717,32 @@ GrB_Info GB_Adot3B__max_max_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_int64
+GrB_Info GB_Asaxpy3B__any_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_uint8
+GrB_Info GB_Adot4B__any_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_uint8
+GrB_Info GB_Adot2B__any_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6321,10 +6750,10 @@ GrB_Info GB_Adot2B__max_max_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_uint8
+GrB_Info GB_Adot3B__any_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6332,33 +6761,32 @@ GrB_Info GB_Adot3B__max_max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_uint8
+GrB_Info GB_Asaxpy3B__any_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_uint16
+GrB_Info GB_Adot4B__any_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_uint16
+GrB_Info GB_Adot2B__any_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6366,10 +6794,10 @@ GrB_Info GB_Adot2B__max_max_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_uint16
+GrB_Info GB_Adot3B__any_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6377,33 +6805,32 @@ GrB_Info GB_Adot3B__max_max_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_uint16
+GrB_Info GB_Asaxpy3B__any_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_uint32
+GrB_Info GB_Adot4B__any_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_uint32
+GrB_Info GB_Adot2B__any_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6411,10 +6838,10 @@ GrB_Info GB_Adot2B__max_max_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_uint32
+GrB_Info GB_Adot3B__any_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6422,33 +6849,32 @@ GrB_Info GB_Adot3B__max_max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_uint32
+GrB_Info GB_Asaxpy3B__any_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_max_uint64
+GrB_Info GB_Adot4B__any_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_uint64
+GrB_Info GB_Adot2B__any_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6456,10 +6882,10 @@ GrB_Info GB_Adot2B__max_max_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_uint64
+GrB_Info GB_Adot3B__any_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6467,32 +6893,32 @@ GrB_Info GB_Adot3B__max_max_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_uint64
+GrB_Info GB_Asaxpy3B__any_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_max_fp32
+GrB_Info GB_Adot4B__any_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_fp32
+GrB_Info GB_Adot2B__any_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6500,10 +6926,10 @@ GrB_Info GB_Adot2B__max_max_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_fp32
+GrB_Info GB_Adot3B__any_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6511,32 +6937,32 @@ GrB_Info GB_Adot3B__max_max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_fp32
+GrB_Info GB_Asaxpy3B__any_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_max_fp64
+GrB_Info GB_Adot4B__any_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_max_fp64
+GrB_Info GB_Adot2B__any_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6544,10 +6970,10 @@ GrB_Info GB_Adot2B__max_max_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_max_fp64
+GrB_Info GB_Adot3B__any_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6555,33 +6981,32 @@ GrB_Info GB_Adot3B__max_max_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_max_fp64
+GrB_Info GB_Asaxpy3B__any_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_int8
+GrB_Info GB_Adot4B__any_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_int8
+GrB_Info GB_Adot2B__any_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6589,10 +7014,10 @@ GrB_Info GB_Adot2B__plus_max_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_int8
+GrB_Info GB_Adot3B__any_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6600,33 +7025,32 @@ GrB_Info GB_Adot3B__plus_max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_int8
+GrB_Info GB_Asaxpy3B__any_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_uint8
+GrB_Info GB_Adot4B__any_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_uint8
+GrB_Info GB_Adot2B__any_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6634,10 +7058,10 @@ GrB_Info GB_Adot2B__plus_max_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_uint8
+GrB_Info GB_Adot3B__any_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6645,33 +7069,32 @@ GrB_Info GB_Adot3B__plus_max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_uint8
+GrB_Info GB_Asaxpy3B__any_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_int16
+GrB_Info GB_Adot4B__any_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_int16
+GrB_Info GB_Adot2B__any_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6679,10 +7102,10 @@ GrB_Info GB_Adot2B__plus_max_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_int16
+GrB_Info GB_Adot3B__any_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6690,33 +7113,32 @@ GrB_Info GB_Adot3B__plus_max_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_int16
+GrB_Info GB_Asaxpy3B__any_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_uint16
+GrB_Info GB_Adot4B__any_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_uint16
+GrB_Info GB_Adot2B__plus_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6724,10 +7146,10 @@ GrB_Info GB_Adot2B__plus_max_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_uint16
+GrB_Info GB_Adot3B__plus_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6735,33 +7157,32 @@ GrB_Info GB_Adot3B__plus_max_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_uint16
+GrB_Info GB_Asaxpy3B__plus_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_int32
+GrB_Info GB_Adot4B__plus_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_int32
+GrB_Info GB_Adot2B__plus_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6769,10 +7190,10 @@ GrB_Info GB_Adot2B__plus_max_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_int32
+GrB_Info GB_Adot3B__plus_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6780,33 +7201,32 @@ GrB_Info GB_Adot3B__plus_max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_int32
+GrB_Info GB_Asaxpy3B__plus_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_uint32
+GrB_Info GB_Adot4B__plus_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_uint32
+GrB_Info GB_Adot2B__plus_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6814,10 +7234,10 @@ GrB_Info GB_Adot2B__plus_max_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_uint32
+GrB_Info GB_Adot3B__plus_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6825,33 +7245,32 @@ GrB_Info GB_Adot3B__plus_max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_uint32
+GrB_Info GB_Asaxpy3B__plus_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_int64
+GrB_Info GB_Adot4B__plus_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_int64
+GrB_Info GB_Adot2B__plus_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6859,10 +7278,10 @@ GrB_Info GB_Adot2B__plus_max_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_int64
+GrB_Info GB_Adot3B__plus_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6870,33 +7289,32 @@ GrB_Info GB_Adot3B__plus_max_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_int64
+GrB_Info GB_Asaxpy3B__plus_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_max_uint64
+GrB_Info GB_Adot4B__plus_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_uint64
+GrB_Info GB_Adot2B__plus_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6904,10 +7322,10 @@ GrB_Info GB_Adot2B__plus_max_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_uint64
+GrB_Info GB_Adot3B__plus_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6915,32 +7333,32 @@ GrB_Info GB_Adot3B__plus_max_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_uint64
+GrB_Info GB_Asaxpy3B__plus_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_max_fp32
+GrB_Info GB_Adot4B__plus_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_fp32
+GrB_Info GB_Adot2B__plus_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6948,10 +7366,10 @@ GrB_Info GB_Adot2B__plus_max_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_fp32
+GrB_Info GB_Adot3B__plus_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -6959,32 +7377,32 @@ GrB_Info GB_Adot3B__plus_max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_fp32
+GrB_Info GB_Asaxpy3B__plus_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_max_fp64
+GrB_Info GB_Adot4B__plus_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_max_fp64
+GrB_Info GB_Adot2B__plus_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -6992,10 +7410,10 @@ GrB_Info GB_Adot2B__plus_max_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_max_fp64
+GrB_Info GB_Adot3B__plus_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7003,33 +7421,32 @@ GrB_Info GB_Adot3B__plus_max_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_max_fp64
+GrB_Info GB_Asaxpy3B__plus_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_int8
+GrB_Info GB_Adot4B__plus_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_int8
+GrB_Info GB_Adot2B__plus_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7037,10 +7454,10 @@ GrB_Info GB_Adot2B__times_max_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_int8
+GrB_Info GB_Adot3B__plus_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7048,33 +7465,32 @@ GrB_Info GB_Adot3B__times_max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_int8
+GrB_Info GB_Asaxpy3B__plus_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_uint8
+GrB_Info GB_Adot4B__plus_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_uint8
+GrB_Info GB_Adot2B__plus_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7082,10 +7498,10 @@ GrB_Info GB_Adot2B__times_max_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_uint8
+GrB_Info GB_Adot3B__plus_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7093,33 +7509,32 @@ GrB_Info GB_Adot3B__times_max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_uint8
+GrB_Info GB_Asaxpy3B__plus_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_int16
+GrB_Info GB_Adot4B__plus_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_int16
+GrB_Info GB_Adot2B__plus_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7127,10 +7542,10 @@ GrB_Info GB_Adot2B__times_max_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_int16
+GrB_Info GB_Adot3B__plus_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7138,33 +7553,32 @@ GrB_Info GB_Adot3B__times_max_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_int16
+GrB_Info GB_Asaxpy3B__plus_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_uint16
+GrB_Info GB_Adot4B__plus_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_uint16
+GrB_Info GB_Adot2B__times_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7172,10 +7586,10 @@ GrB_Info GB_Adot2B__times_max_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_uint16
+GrB_Info GB_Adot3B__times_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7183,33 +7597,32 @@ GrB_Info GB_Adot3B__times_max_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_uint16
+GrB_Info GB_Asaxpy3B__times_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_int32
+GrB_Info GB_Adot4B__times_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_int32
+GrB_Info GB_Adot2B__times_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7217,10 +7630,10 @@ GrB_Info GB_Adot2B__times_max_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_int32
+GrB_Info GB_Adot3B__times_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7228,33 +7641,32 @@ GrB_Info GB_Adot3B__times_max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_int32
+GrB_Info GB_Asaxpy3B__times_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_uint32
+GrB_Info GB_Adot4B__times_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_uint32
+GrB_Info GB_Adot2B__times_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7262,10 +7674,10 @@ GrB_Info GB_Adot2B__times_max_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_uint32
+GrB_Info GB_Adot3B__times_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7273,33 +7685,32 @@ GrB_Info GB_Adot3B__times_max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_uint32
+GrB_Info GB_Asaxpy3B__times_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_int64
+GrB_Info GB_Adot4B__times_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_int64
+GrB_Info GB_Adot2B__times_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7307,10 +7718,10 @@ GrB_Info GB_Adot2B__times_max_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_int64
+GrB_Info GB_Adot3B__times_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7318,33 +7729,32 @@ GrB_Info GB_Adot3B__times_max_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_int64
+GrB_Info GB_Asaxpy3B__times_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_max_uint64
+GrB_Info GB_Adot4B__times_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_uint64
+GrB_Info GB_Adot2B__times_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7352,10 +7762,10 @@ GrB_Info GB_Adot2B__times_max_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_uint64
+GrB_Info GB_Adot3B__times_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7363,32 +7773,32 @@ GrB_Info GB_Adot3B__times_max_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_uint64
+GrB_Info GB_Asaxpy3B__times_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_max_fp32
+GrB_Info GB_Adot4B__times_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_fp32
+GrB_Info GB_Adot2B__times_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7396,10 +7806,10 @@ GrB_Info GB_Adot2B__times_max_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_fp32
+GrB_Info GB_Adot3B__times_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7407,32 +7817,32 @@ GrB_Info GB_Adot3B__times_max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_fp32
+GrB_Info GB_Asaxpy3B__times_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_max_fp64
+GrB_Info GB_Adot4B__times_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_max_fp64
+GrB_Info GB_Adot2B__times_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7440,10 +7850,10 @@ GrB_Info GB_Adot2B__times_max_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_max_fp64
+GrB_Info GB_Adot3B__times_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7451,33 +7861,32 @@ GrB_Info GB_Adot3B__times_max_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_max_fp64
+GrB_Info GB_Asaxpy3B__times_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_int8
+GrB_Info GB_Adot4B__times_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_int8
+GrB_Info GB_Adot2B__times_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7485,10 +7894,10 @@ GrB_Info GB_Adot2B__min_plus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_int8
+GrB_Info GB_Adot3B__times_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7496,33 +7905,32 @@ GrB_Info GB_Adot3B__min_plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_int8
+GrB_Info GB_Asaxpy3B__times_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_int16
+GrB_Info GB_Adot4B__times_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_int16
+GrB_Info GB_Adot2B__times_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7530,10 +7938,10 @@ GrB_Info GB_Adot2B__min_plus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_int16
+GrB_Info GB_Adot3B__times_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7541,33 +7949,32 @@ GrB_Info GB_Adot3B__min_plus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_int16
+GrB_Info GB_Asaxpy3B__times_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_int32
+GrB_Info GB_Adot4B__times_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_int32
+GrB_Info GB_Adot2B__times_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7575,10 +7982,10 @@ GrB_Info GB_Adot2B__min_plus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_int32
+GrB_Info GB_Adot3B__times_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7586,33 +7993,32 @@ GrB_Info GB_Adot3B__min_plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_int32
+GrB_Info GB_Asaxpy3B__times_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_int64
+GrB_Info GB_Adot4B__times_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_int64
+GrB_Info GB_Adot2B__min_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7620,10 +8026,10 @@ GrB_Info GB_Adot2B__min_plus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_int64
+GrB_Info GB_Adot3B__min_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7631,33 +8037,32 @@ GrB_Info GB_Adot3B__min_plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_int64
+GrB_Info GB_Asaxpy3B__min_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_uint8
+GrB_Info GB_Adot4B__min_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_uint8
+GrB_Info GB_Adot2B__min_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7665,10 +8070,10 @@ GrB_Info GB_Adot2B__min_plus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_uint8
+GrB_Info GB_Adot3B__min_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7676,33 +8081,32 @@ GrB_Info GB_Adot3B__min_plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_uint8
+GrB_Info GB_Asaxpy3B__min_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_uint16
+GrB_Info GB_Adot4B__min_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_uint16
+GrB_Info GB_Adot2B__min_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7710,10 +8114,10 @@ GrB_Info GB_Adot2B__min_plus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_uint16
+GrB_Info GB_Adot3B__min_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7721,33 +8125,32 @@ GrB_Info GB_Adot3B__min_plus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_uint16
+GrB_Info GB_Asaxpy3B__min_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_uint32
+GrB_Info GB_Adot4B__min_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_uint32
+GrB_Info GB_Adot2B__min_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7755,10 +8158,10 @@ GrB_Info GB_Adot2B__min_plus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_uint32
+GrB_Info GB_Adot3B__min_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7766,33 +8169,32 @@ GrB_Info GB_Adot3B__min_plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_uint32
+GrB_Info GB_Asaxpy3B__min_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_plus_uint64
+GrB_Info GB_Adot4B__min_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_uint64
+GrB_Info GB_Adot2B__min_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7800,10 +8202,10 @@ GrB_Info GB_Adot2B__min_plus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_uint64
+GrB_Info GB_Adot3B__min_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7811,32 +8213,32 @@ GrB_Info GB_Adot3B__min_plus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_uint64
+GrB_Info GB_Asaxpy3B__min_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_plus_fp32
+GrB_Info GB_Adot4B__min_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_fp32
+GrB_Info GB_Adot2B__min_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7844,10 +8246,10 @@ GrB_Info GB_Adot2B__min_plus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_fp32
+GrB_Info GB_Adot3B__min_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7855,32 +8257,32 @@ GrB_Info GB_Adot3B__min_plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_fp32
+GrB_Info GB_Asaxpy3B__min_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_plus_fp64
+GrB_Info GB_Adot4B__min_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_plus_fp64
+GrB_Info GB_Adot2B__min_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7888,10 +8290,10 @@ GrB_Info GB_Adot2B__min_plus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_plus_fp64
+GrB_Info GB_Adot3B__min_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7899,33 +8301,32 @@ GrB_Info GB_Adot3B__min_plus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_plus_fp64
+GrB_Info GB_Asaxpy3B__min_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_int8
+GrB_Info GB_Adot4B__min_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_int8
+GrB_Info GB_Adot2B__min_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7933,10 +8334,10 @@ GrB_Info GB_Adot2B__max_plus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_int8
+GrB_Info GB_Adot3B__min_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7944,33 +8345,32 @@ GrB_Info GB_Adot3B__max_plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_int8
+GrB_Info GB_Asaxpy3B__min_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_int16
+GrB_Info GB_Adot4B__min_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_int16
+GrB_Info GB_Adot2B__min_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -7978,10 +8378,10 @@ GrB_Info GB_Adot2B__max_plus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_int16
+GrB_Info GB_Adot3B__min_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -7989,33 +8389,32 @@ GrB_Info GB_Adot3B__max_plus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_int16
+GrB_Info GB_Asaxpy3B__min_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_int32
+GrB_Info GB_Adot4B__min_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_int32
+GrB_Info GB_Adot2B__min_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8023,10 +8422,10 @@ GrB_Info GB_Adot2B__max_plus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_int32
+GrB_Info GB_Adot3B__min_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8034,33 +8433,32 @@ GrB_Info GB_Adot3B__max_plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_int32
+GrB_Info GB_Asaxpy3B__min_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_int64
+GrB_Info GB_Adot4B__min_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_int64
+GrB_Info GB_Adot2B__max_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8068,10 +8466,10 @@ GrB_Info GB_Adot2B__max_plus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_int64
+GrB_Info GB_Adot3B__max_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8079,33 +8477,32 @@ GrB_Info GB_Adot3B__max_plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_int64
+GrB_Info GB_Asaxpy3B__max_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_uint8
+GrB_Info GB_Adot4B__max_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_uint8
+GrB_Info GB_Adot2B__max_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8113,10 +8510,10 @@ GrB_Info GB_Adot2B__max_plus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_uint8
+GrB_Info GB_Adot3B__max_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8124,33 +8521,32 @@ GrB_Info GB_Adot3B__max_plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_uint8
+GrB_Info GB_Asaxpy3B__max_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_uint16
+GrB_Info GB_Adot4B__max_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_uint16
+GrB_Info GB_Adot2B__max_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8158,10 +8554,10 @@ GrB_Info GB_Adot2B__max_plus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_uint16
+GrB_Info GB_Adot3B__max_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8169,33 +8565,32 @@ GrB_Info GB_Adot3B__max_plus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_uint16
+GrB_Info GB_Asaxpy3B__max_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_uint32
+GrB_Info GB_Adot4B__max_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_uint32
+GrB_Info GB_Adot2B__max_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8203,10 +8598,10 @@ GrB_Info GB_Adot2B__max_plus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_uint32
+GrB_Info GB_Adot3B__max_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8214,33 +8609,32 @@ GrB_Info GB_Adot3B__max_plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_uint32
+GrB_Info GB_Asaxpy3B__max_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_plus_uint64
+GrB_Info GB_Adot4B__max_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_uint64
+GrB_Info GB_Adot2B__max_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8248,10 +8642,10 @@ GrB_Info GB_Adot2B__max_plus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_uint64
+GrB_Info GB_Adot3B__max_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8259,32 +8653,32 @@ GrB_Info GB_Adot3B__max_plus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_uint64
+GrB_Info GB_Asaxpy3B__max_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_plus_fp32
+GrB_Info GB_Adot4B__max_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_fp32
+GrB_Info GB_Adot2B__max_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8292,10 +8686,10 @@ GrB_Info GB_Adot2B__max_plus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_fp32
+GrB_Info GB_Adot3B__max_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8303,32 +8697,32 @@ GrB_Info GB_Adot3B__max_plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_fp32
+GrB_Info GB_Asaxpy3B__max_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_plus_fp64
+GrB_Info GB_Adot4B__max_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_plus_fp64
+GrB_Info GB_Adot2B__max_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8336,10 +8730,10 @@ GrB_Info GB_Adot2B__max_plus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_plus_fp64
+GrB_Info GB_Adot3B__max_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8347,33 +8741,32 @@ GrB_Info GB_Adot3B__max_plus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_plus_fp64
+GrB_Info GB_Asaxpy3B__max_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_int8
+GrB_Info GB_Adot4B__max_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_int8
+GrB_Info GB_Adot2B__max_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8381,10 +8774,10 @@ GrB_Info GB_Adot2B__plus_plus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_int8
+GrB_Info GB_Adot3B__max_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8392,33 +8785,32 @@ GrB_Info GB_Adot3B__plus_plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_int8
+GrB_Info GB_Asaxpy3B__max_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_uint8
+GrB_Info GB_Adot4B__max_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_uint8
+GrB_Info GB_Adot2B__max_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8426,10 +8818,10 @@ GrB_Info GB_Adot2B__plus_plus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_uint8
+GrB_Info GB_Adot3B__max_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8437,33 +8829,32 @@ GrB_Info GB_Adot3B__plus_plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_uint8
+GrB_Info GB_Asaxpy3B__max_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_int16
+GrB_Info GB_Adot4B__max_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_int16
+GrB_Info GB_Adot2B__max_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8471,10 +8862,10 @@ GrB_Info GB_Adot2B__plus_plus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_int16
+GrB_Info GB_Adot3B__max_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8482,33 +8873,32 @@ GrB_Info GB_Adot3B__plus_plus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_int16
+GrB_Info GB_Asaxpy3B__max_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_uint16
+GrB_Info GB_Adot4B__max_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_uint16
+GrB_Info GB_Adot2B__any_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8516,10 +8906,10 @@ GrB_Info GB_Adot2B__plus_plus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_uint16
+GrB_Info GB_Adot3B__any_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8527,33 +8917,32 @@ GrB_Info GB_Adot3B__plus_plus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_uint16
+GrB_Info GB_Asaxpy3B__any_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_int32
+GrB_Info GB_Adot4B__any_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_int32
+GrB_Info GB_Adot2B__any_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8561,10 +8950,10 @@ GrB_Info GB_Adot2B__plus_plus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_int32
+GrB_Info GB_Adot3B__any_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8572,33 +8961,32 @@ GrB_Info GB_Adot3B__plus_plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_int32
+GrB_Info GB_Asaxpy3B__any_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_uint32
+GrB_Info GB_Adot4B__any_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_uint32
+GrB_Info GB_Adot2B__any_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8606,10 +8994,10 @@ GrB_Info GB_Adot2B__plus_plus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_uint32
+GrB_Info GB_Adot3B__any_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8617,33 +9005,32 @@ GrB_Info GB_Adot3B__plus_plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_uint32
+GrB_Info GB_Asaxpy3B__any_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_int64
+GrB_Info GB_Adot4B__any_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_int64
+GrB_Info GB_Adot2B__any_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8651,10 +9038,10 @@ GrB_Info GB_Adot2B__plus_plus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_int64
+GrB_Info GB_Adot3B__any_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8662,33 +9049,32 @@ GrB_Info GB_Adot3B__plus_plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_int64
+GrB_Info GB_Asaxpy3B__any_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_plus_uint64
+GrB_Info GB_Adot4B__any_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_uint64
+GrB_Info GB_Adot2B__any_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8696,10 +9082,10 @@ GrB_Info GB_Adot2B__plus_plus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_uint64
+GrB_Info GB_Adot3B__any_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8707,32 +9093,32 @@ GrB_Info GB_Adot3B__plus_plus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_uint64
+GrB_Info GB_Asaxpy3B__any_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_plus_fp32
+GrB_Info GB_Adot4B__any_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_fp32
+GrB_Info GB_Adot2B__any_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8740,10 +9126,10 @@ GrB_Info GB_Adot2B__plus_plus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_fp32
+GrB_Info GB_Adot3B__any_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8751,32 +9137,32 @@ GrB_Info GB_Adot3B__plus_plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_fp32
+GrB_Info GB_Asaxpy3B__any_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_plus_fp64
+GrB_Info GB_Adot4B__any_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_plus_fp64
+GrB_Info GB_Adot2B__any_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8784,10 +9170,10 @@ GrB_Info GB_Adot2B__plus_plus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_plus_fp64
+GrB_Info GB_Adot3B__any_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8795,33 +9181,32 @@ GrB_Info GB_Adot3B__plus_plus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_plus_fp64
+GrB_Info GB_Asaxpy3B__any_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_int8
+GrB_Info GB_Adot4B__any_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_int8
+GrB_Info GB_Adot2B__any_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8829,10 +9214,10 @@ GrB_Info GB_Adot2B__times_plus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_int8
+GrB_Info GB_Adot3B__any_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8840,33 +9225,32 @@ GrB_Info GB_Adot3B__times_plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_int8
+GrB_Info GB_Asaxpy3B__any_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_uint8
+GrB_Info GB_Adot4B__any_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_uint8
+GrB_Info GB_Adot2B__any_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8874,10 +9258,10 @@ GrB_Info GB_Adot2B__times_plus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_uint8
+GrB_Info GB_Adot3B__any_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8885,33 +9269,32 @@ GrB_Info GB_Adot3B__times_plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_uint8
+GrB_Info GB_Asaxpy3B__any_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_int16
+GrB_Info GB_Adot4B__any_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_int16
+GrB_Info GB_Adot2B__any_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8919,10 +9302,10 @@ GrB_Info GB_Adot2B__times_plus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_int16
+GrB_Info GB_Adot3B__any_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8930,33 +9313,32 @@ GrB_Info GB_Adot3B__times_plus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_int16
+GrB_Info GB_Asaxpy3B__any_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_uint16
+GrB_Info GB_Adot4B__any_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_uint16
+GrB_Info GB_Adot2B__plus_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -8964,10 +9346,10 @@ GrB_Info GB_Adot2B__times_plus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_uint16
+GrB_Info GB_Adot3B__plus_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -8975,33 +9357,32 @@ GrB_Info GB_Adot3B__times_plus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_uint16
+GrB_Info GB_Asaxpy3B__plus_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_int32
+GrB_Info GB_Adot4B__plus_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_int32
+GrB_Info GB_Adot2B__plus_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9009,10 +9390,10 @@ GrB_Info GB_Adot2B__times_plus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_int32
+GrB_Info GB_Adot3B__plus_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9020,33 +9401,32 @@ GrB_Info GB_Adot3B__times_plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_int32
+GrB_Info GB_Asaxpy3B__plus_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_uint32
+GrB_Info GB_Adot4B__plus_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_uint32
+GrB_Info GB_Adot2B__plus_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9054,10 +9434,10 @@ GrB_Info GB_Adot2B__times_plus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_uint32
+GrB_Info GB_Adot3B__plus_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9065,33 +9445,32 @@ GrB_Info GB_Adot3B__times_plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_uint32
+GrB_Info GB_Asaxpy3B__plus_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_int64
+GrB_Info GB_Adot4B__plus_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_int64
+GrB_Info GB_Adot2B__plus_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9099,10 +9478,10 @@ GrB_Info GB_Adot2B__times_plus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_int64
+GrB_Info GB_Adot3B__plus_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9110,33 +9489,32 @@ GrB_Info GB_Adot3B__times_plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_int64
+GrB_Info GB_Asaxpy3B__plus_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_plus_uint64
+GrB_Info GB_Adot4B__plus_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_uint64
+GrB_Info GB_Adot2B__plus_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9144,10 +9522,10 @@ GrB_Info GB_Adot2B__times_plus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_uint64
+GrB_Info GB_Adot3B__plus_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9155,32 +9533,32 @@ GrB_Info GB_Adot3B__times_plus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_uint64
+GrB_Info GB_Asaxpy3B__plus_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_plus_fp32
+GrB_Info GB_Adot4B__plus_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_fp32
+GrB_Info GB_Adot2B__plus_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9188,10 +9566,10 @@ GrB_Info GB_Adot2B__times_plus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_fp32
+GrB_Info GB_Adot3B__plus_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9199,32 +9577,32 @@ GrB_Info GB_Adot3B__times_plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_fp32
+GrB_Info GB_Asaxpy3B__plus_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_plus_fp64
+GrB_Info GB_Adot4B__plus_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_plus_fp64
+GrB_Info GB_Adot2B__plus_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9232,10 +9610,10 @@ GrB_Info GB_Adot2B__times_plus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_plus_fp64
+GrB_Info GB_Adot3B__plus_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9243,33 +9621,32 @@ GrB_Info GB_Adot3B__times_plus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_plus_fp64
+GrB_Info GB_Asaxpy3B__plus_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_int8
+GrB_Info GB_Adot4B__plus_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_int8
+GrB_Info GB_Adot2B__plus_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9277,10 +9654,10 @@ GrB_Info GB_Adot2B__min_minus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_int8
+GrB_Info GB_Adot3B__plus_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9288,33 +9665,32 @@ GrB_Info GB_Adot3B__min_minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_int8
+GrB_Info GB_Asaxpy3B__plus_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_int16
+GrB_Info GB_Adot4B__plus_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_int16
+GrB_Info GB_Adot2B__plus_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9322,10 +9698,10 @@ GrB_Info GB_Adot2B__min_minus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_int16
+GrB_Info GB_Adot3B__plus_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9333,33 +9709,32 @@ GrB_Info GB_Adot3B__min_minus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_int16
+GrB_Info GB_Asaxpy3B__plus_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_int32
+GrB_Info GB_Adot4B__plus_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_int32
+GrB_Info GB_Adot2B__plus_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9367,10 +9742,10 @@ GrB_Info GB_Adot2B__min_minus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_int32
+GrB_Info GB_Adot3B__plus_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9378,33 +9753,32 @@ GrB_Info GB_Adot3B__min_minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_int32
+GrB_Info GB_Asaxpy3B__plus_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_int64
+GrB_Info GB_Adot4B__plus_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_int64
+GrB_Info GB_Adot2B__times_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9412,10 +9786,10 @@ GrB_Info GB_Adot2B__min_minus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_int64
+GrB_Info GB_Adot3B__times_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9423,33 +9797,32 @@ GrB_Info GB_Adot3B__min_minus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_int64
+GrB_Info GB_Asaxpy3B__times_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_uint8
+GrB_Info GB_Adot4B__times_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_uint8
+GrB_Info GB_Adot2B__times_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9457,10 +9830,10 @@ GrB_Info GB_Adot2B__min_minus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_uint8
+GrB_Info GB_Adot3B__times_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9468,33 +9841,32 @@ GrB_Info GB_Adot3B__min_minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_uint8
+GrB_Info GB_Asaxpy3B__times_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_uint16
+GrB_Info GB_Adot4B__times_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_uint16
+GrB_Info GB_Adot2B__times_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9502,10 +9874,10 @@ GrB_Info GB_Adot2B__min_minus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_uint16
+GrB_Info GB_Adot3B__times_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9513,33 +9885,32 @@ GrB_Info GB_Adot3B__min_minus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_uint16
+GrB_Info GB_Asaxpy3B__times_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_uint32
+GrB_Info GB_Adot4B__times_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_uint32
+GrB_Info GB_Adot2B__times_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9547,10 +9918,10 @@ GrB_Info GB_Adot2B__min_minus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_uint32
+GrB_Info GB_Adot3B__times_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9558,33 +9929,32 @@ GrB_Info GB_Adot3B__min_minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_uint32
+GrB_Info GB_Asaxpy3B__times_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_minus_uint64
+GrB_Info GB_Adot4B__times_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_uint64
+GrB_Info GB_Adot2B__times_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9592,10 +9962,10 @@ GrB_Info GB_Adot2B__min_minus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_uint64
+GrB_Info GB_Adot3B__times_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9603,32 +9973,32 @@ GrB_Info GB_Adot3B__min_minus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_uint64
+GrB_Info GB_Asaxpy3B__times_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_minus_fp32
+GrB_Info GB_Adot4B__times_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_fp32
+GrB_Info GB_Adot2B__times_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9636,10 +10006,10 @@ GrB_Info GB_Adot2B__min_minus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_fp32
+GrB_Info GB_Adot3B__times_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9647,32 +10017,32 @@ GrB_Info GB_Adot3B__min_minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_fp32
+GrB_Info GB_Asaxpy3B__times_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_minus_fp64
+GrB_Info GB_Adot4B__times_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_minus_fp64
+GrB_Info GB_Adot2B__times_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9680,10 +10050,10 @@ GrB_Info GB_Adot2B__min_minus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_minus_fp64
+GrB_Info GB_Adot3B__times_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9691,33 +10061,32 @@ GrB_Info GB_Adot3B__min_minus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_minus_fp64
+GrB_Info GB_Asaxpy3B__times_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_int8
+GrB_Info GB_Adot4B__times_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_int8
+GrB_Info GB_Adot2B__times_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9725,10 +10094,10 @@ GrB_Info GB_Adot2B__max_minus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_int8
+GrB_Info GB_Adot3B__times_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9736,33 +10105,32 @@ GrB_Info GB_Adot3B__max_minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_int8
+GrB_Info GB_Asaxpy3B__times_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_int16
+GrB_Info GB_Adot4B__times_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_int16
+GrB_Info GB_Adot2B__times_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9770,10 +10138,10 @@ GrB_Info GB_Adot2B__max_minus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_int16
+GrB_Info GB_Adot3B__times_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9781,33 +10149,32 @@ GrB_Info GB_Adot3B__max_minus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_int16
+GrB_Info GB_Asaxpy3B__times_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_int32
+GrB_Info GB_Adot4B__times_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_int32
+GrB_Info GB_Adot2B__times_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9815,10 +10182,10 @@ GrB_Info GB_Adot2B__max_minus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_int32
+GrB_Info GB_Adot3B__times_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9826,33 +10193,32 @@ GrB_Info GB_Adot3B__max_minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_int32
+GrB_Info GB_Asaxpy3B__times_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_int64
+GrB_Info GB_Adot4B__times_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_int64
+GrB_Info GB_Adot2B__min_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9860,10 +10226,10 @@ GrB_Info GB_Adot2B__max_minus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_int64
+GrB_Info GB_Adot3B__min_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9871,33 +10237,32 @@ GrB_Info GB_Adot3B__max_minus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_int64
+GrB_Info GB_Asaxpy3B__min_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_uint8
+GrB_Info GB_Adot4B__min_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_uint8
+GrB_Info GB_Adot2B__min_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9905,10 +10270,10 @@ GrB_Info GB_Adot2B__max_minus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_uint8
+GrB_Info GB_Adot3B__min_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9916,33 +10281,32 @@ GrB_Info GB_Adot3B__max_minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_uint8
+GrB_Info GB_Asaxpy3B__min_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_uint16
+GrB_Info GB_Adot4B__min_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_uint16
+GrB_Info GB_Adot2B__min_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9950,10 +10314,10 @@ GrB_Info GB_Adot2B__max_minus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_uint16
+GrB_Info GB_Adot3B__min_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -9961,33 +10325,32 @@ GrB_Info GB_Adot3B__max_minus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_uint16
+GrB_Info GB_Asaxpy3B__min_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_uint32
+GrB_Info GB_Adot4B__min_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_uint32
+GrB_Info GB_Adot2B__min_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -9995,10 +10358,10 @@ GrB_Info GB_Adot2B__max_minus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_uint32
+GrB_Info GB_Adot3B__min_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10006,33 +10369,32 @@ GrB_Info GB_Adot3B__max_minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_uint32
+GrB_Info GB_Asaxpy3B__min_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_minus_uint64
+GrB_Info GB_Adot4B__min_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_uint64
+GrB_Info GB_Adot2B__min_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10040,10 +10402,10 @@ GrB_Info GB_Adot2B__max_minus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_uint64
+GrB_Info GB_Adot3B__min_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10051,32 +10413,32 @@ GrB_Info GB_Adot3B__max_minus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_uint64
+GrB_Info GB_Asaxpy3B__min_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_minus_fp32
+GrB_Info GB_Adot4B__min_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_fp32
+GrB_Info GB_Adot2B__min_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10084,10 +10446,10 @@ GrB_Info GB_Adot2B__max_minus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_fp32
+GrB_Info GB_Adot3B__min_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10095,32 +10457,32 @@ GrB_Info GB_Adot3B__max_minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_fp32
+GrB_Info GB_Asaxpy3B__min_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_minus_fp64
+GrB_Info GB_Adot4B__min_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_minus_fp64
+GrB_Info GB_Adot2B__min_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10128,10 +10490,10 @@ GrB_Info GB_Adot2B__max_minus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_minus_fp64
+GrB_Info GB_Adot3B__min_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10139,33 +10501,32 @@ GrB_Info GB_Adot3B__max_minus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_minus_fp64
+GrB_Info GB_Asaxpy3B__min_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_int8
+GrB_Info GB_Adot4B__min_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_int8
+GrB_Info GB_Adot2B__min_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10173,10 +10534,10 @@ GrB_Info GB_Adot2B__plus_minus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_int8
+GrB_Info GB_Adot3B__min_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10184,33 +10545,32 @@ GrB_Info GB_Adot3B__plus_minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_int8
+GrB_Info GB_Asaxpy3B__min_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_uint8
+GrB_Info GB_Adot4B__min_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_uint8
+GrB_Info GB_Adot2B__min_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10218,10 +10578,10 @@ GrB_Info GB_Adot2B__plus_minus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_uint8
+GrB_Info GB_Adot3B__min_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10229,33 +10589,32 @@ GrB_Info GB_Adot3B__plus_minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_uint8
+GrB_Info GB_Asaxpy3B__min_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_int16
+GrB_Info GB_Adot4B__min_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_int16
+GrB_Info GB_Adot2B__min_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10263,10 +10622,10 @@ GrB_Info GB_Adot2B__plus_minus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_int16
+GrB_Info GB_Adot3B__min_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10274,33 +10633,32 @@ GrB_Info GB_Adot3B__plus_minus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_int16
+GrB_Info GB_Asaxpy3B__min_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_uint16
+GrB_Info GB_Adot4B__min_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_uint16
+GrB_Info GB_Adot2B__max_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10308,10 +10666,10 @@ GrB_Info GB_Adot2B__plus_minus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_uint16
+GrB_Info GB_Adot3B__max_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10319,33 +10677,32 @@ GrB_Info GB_Adot3B__plus_minus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_uint16
+GrB_Info GB_Asaxpy3B__max_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_int32
+GrB_Info GB_Adot4B__max_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_int32
+GrB_Info GB_Adot2B__max_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10353,10 +10710,10 @@ GrB_Info GB_Adot2B__plus_minus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_int32
+GrB_Info GB_Adot3B__max_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10364,33 +10721,32 @@ GrB_Info GB_Adot3B__plus_minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_int32
+GrB_Info GB_Asaxpy3B__max_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_uint32
+GrB_Info GB_Adot4B__max_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_uint32
+GrB_Info GB_Adot2B__max_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10398,10 +10754,10 @@ GrB_Info GB_Adot2B__plus_minus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_uint32
+GrB_Info GB_Adot3B__max_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10409,33 +10765,32 @@ GrB_Info GB_Adot3B__plus_minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_uint32
+GrB_Info GB_Asaxpy3B__max_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_int64
+GrB_Info GB_Adot4B__max_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_int64
+GrB_Info GB_Adot2B__max_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10443,10 +10798,10 @@ GrB_Info GB_Adot2B__plus_minus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_int64
+GrB_Info GB_Adot3B__max_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10454,33 +10809,32 @@ GrB_Info GB_Adot3B__plus_minus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_int64
+GrB_Info GB_Asaxpy3B__max_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_minus_uint64
+GrB_Info GB_Adot4B__max_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_uint64
+GrB_Info GB_Adot2B__max_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10488,10 +10842,10 @@ GrB_Info GB_Adot2B__plus_minus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_uint64
+GrB_Info GB_Adot3B__max_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10499,32 +10853,32 @@ GrB_Info GB_Adot3B__plus_minus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_uint64
+GrB_Info GB_Asaxpy3B__max_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_minus_fp32
+GrB_Info GB_Adot4B__max_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_fp32
+GrB_Info GB_Adot2B__max_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10532,10 +10886,10 @@ GrB_Info GB_Adot2B__plus_minus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_fp32
+GrB_Info GB_Adot3B__max_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10543,32 +10897,32 @@ GrB_Info GB_Adot3B__plus_minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_fp32
+GrB_Info GB_Asaxpy3B__max_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_minus_fp64
+GrB_Info GB_Adot4B__max_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_minus_fp64
+GrB_Info GB_Adot2B__max_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10576,10 +10930,10 @@ GrB_Info GB_Adot2B__plus_minus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_minus_fp64
+GrB_Info GB_Adot3B__max_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10587,33 +10941,32 @@ GrB_Info GB_Adot3B__plus_minus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_minus_fp64
+GrB_Info GB_Asaxpy3B__max_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_int8
+GrB_Info GB_Adot4B__max_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_int8
+GrB_Info GB_Adot2B__max_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10621,10 +10974,10 @@ GrB_Info GB_Adot2B__times_minus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_int8
+GrB_Info GB_Adot3B__max_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10632,33 +10985,32 @@ GrB_Info GB_Adot3B__times_minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_int8
+GrB_Info GB_Asaxpy3B__max_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_uint8
+GrB_Info GB_Adot4B__max_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_uint8
+GrB_Info GB_Adot2B__max_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10666,10 +11018,10 @@ GrB_Info GB_Adot2B__times_minus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_uint8
+GrB_Info GB_Adot3B__max_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10677,33 +11029,32 @@ GrB_Info GB_Adot3B__times_minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_uint8
+GrB_Info GB_Asaxpy3B__max_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_int16
+GrB_Info GB_Adot4B__max_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_int16
+GrB_Info GB_Adot2B__max_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10711,10 +11062,10 @@ GrB_Info GB_Adot2B__times_minus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_int16
+GrB_Info GB_Adot3B__max_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10722,33 +11073,32 @@ GrB_Info GB_Adot3B__times_minus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_int16
+GrB_Info GB_Asaxpy3B__max_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_uint16
+GrB_Info GB_Adot4B__max_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_uint16
+GrB_Info GB_Adot2B__any_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10756,10 +11106,10 @@ GrB_Info GB_Adot2B__times_minus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_uint16
+GrB_Info GB_Adot3B__any_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10767,33 +11117,32 @@ GrB_Info GB_Adot3B__times_minus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_uint16
+GrB_Info GB_Asaxpy3B__any_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_int32
+GrB_Info GB_Adot4B__any_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_int32
+GrB_Info GB_Adot2B__any_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10801,10 +11150,10 @@ GrB_Info GB_Adot2B__times_minus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_int32
+GrB_Info GB_Adot3B__any_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10812,33 +11161,32 @@ GrB_Info GB_Adot3B__times_minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_int32
+GrB_Info GB_Asaxpy3B__any_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_uint32
+GrB_Info GB_Adot4B__any_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_uint32
+GrB_Info GB_Adot2B__any_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10846,10 +11194,10 @@ GrB_Info GB_Adot2B__times_minus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_uint32
+GrB_Info GB_Adot3B__any_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10857,33 +11205,32 @@ GrB_Info GB_Adot3B__times_minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_uint32
+GrB_Info GB_Asaxpy3B__any_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_int64
+GrB_Info GB_Adot4B__any_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_int64
+GrB_Info GB_Adot2B__any_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10891,10 +11238,10 @@ GrB_Info GB_Adot2B__times_minus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_int64
+GrB_Info GB_Adot3B__any_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10902,33 +11249,32 @@ GrB_Info GB_Adot3B__times_minus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_int64
+GrB_Info GB_Asaxpy3B__any_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_minus_uint64
+GrB_Info GB_Adot4B__any_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_uint64
+GrB_Info GB_Adot2B__any_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10936,10 +11282,10 @@ GrB_Info GB_Adot2B__times_minus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_uint64
+GrB_Info GB_Adot3B__any_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10947,32 +11293,32 @@ GrB_Info GB_Adot3B__times_minus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_uint64
+GrB_Info GB_Asaxpy3B__any_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_minus_fp32
+GrB_Info GB_Adot4B__any_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_fp32
+GrB_Info GB_Adot2B__any_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -10980,10 +11326,10 @@ GrB_Info GB_Adot2B__times_minus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_fp32
+GrB_Info GB_Adot3B__any_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -10991,32 +11337,32 @@ GrB_Info GB_Adot3B__times_minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_fp32
+GrB_Info GB_Asaxpy3B__any_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_minus_fp64
+GrB_Info GB_Adot4B__any_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_minus_fp64
+GrB_Info GB_Adot2B__any_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11024,10 +11370,10 @@ GrB_Info GB_Adot2B__times_minus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_minus_fp64
+GrB_Info GB_Adot3B__any_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11035,33 +11381,32 @@ GrB_Info GB_Adot3B__times_minus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_minus_fp64
+GrB_Info GB_Asaxpy3B__any_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_int8
+GrB_Info GB_Adot4B__any_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_int8
+GrB_Info GB_Adot2B__any_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11069,10 +11414,10 @@ GrB_Info GB_Adot2B__min_rminus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_int8
+GrB_Info GB_Adot3B__any_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11080,33 +11425,32 @@ GrB_Info GB_Adot3B__min_rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_int8
+GrB_Info GB_Asaxpy3B__any_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_int16
+GrB_Info GB_Adot4B__any_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_int16
+GrB_Info GB_Adot2B__any_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11114,10 +11458,10 @@ GrB_Info GB_Adot2B__min_rminus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_int16
+GrB_Info GB_Adot3B__any_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11125,33 +11469,32 @@ GrB_Info GB_Adot3B__min_rminus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_int16
+GrB_Info GB_Asaxpy3B__any_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_int32
+GrB_Info GB_Adot4B__any_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_int32
+GrB_Info GB_Adot2B__any_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11159,10 +11502,10 @@ GrB_Info GB_Adot2B__min_rminus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_int32
+GrB_Info GB_Adot3B__any_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11170,33 +11513,32 @@ GrB_Info GB_Adot3B__min_rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_int32
+GrB_Info GB_Asaxpy3B__any_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_int64
+GrB_Info GB_Adot4B__any_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_int64
+GrB_Info GB_Adot2B__plus_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11204,10 +11546,10 @@ GrB_Info GB_Adot2B__min_rminus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_int64
+GrB_Info GB_Adot3B__plus_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11215,33 +11557,32 @@ GrB_Info GB_Adot3B__min_rminus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_int64
+GrB_Info GB_Asaxpy3B__plus_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_uint8
+GrB_Info GB_Adot4B__plus_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_uint8
+GrB_Info GB_Adot2B__plus_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11249,10 +11590,10 @@ GrB_Info GB_Adot2B__min_rminus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_uint8
+GrB_Info GB_Adot3B__plus_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11260,33 +11601,32 @@ GrB_Info GB_Adot3B__min_rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_uint8
+GrB_Info GB_Asaxpy3B__plus_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_uint16
+GrB_Info GB_Adot4B__plus_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_uint16
+GrB_Info GB_Adot2B__plus_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11294,10 +11634,10 @@ GrB_Info GB_Adot2B__min_rminus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_uint16
+GrB_Info GB_Adot3B__plus_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11305,33 +11645,32 @@ GrB_Info GB_Adot3B__min_rminus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_uint16
+GrB_Info GB_Asaxpy3B__plus_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_uint32
+GrB_Info GB_Adot4B__plus_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_uint32
+GrB_Info GB_Adot2B__plus_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11339,10 +11678,10 @@ GrB_Info GB_Adot2B__min_rminus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_uint32
+GrB_Info GB_Adot3B__plus_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11350,33 +11689,32 @@ GrB_Info GB_Adot3B__min_rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_uint32
+GrB_Info GB_Asaxpy3B__plus_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rminus_uint64
+GrB_Info GB_Adot4B__plus_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_uint64
+GrB_Info GB_Adot2B__plus_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11384,10 +11722,10 @@ GrB_Info GB_Adot2B__min_rminus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_uint64
+GrB_Info GB_Adot3B__plus_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11395,32 +11733,32 @@ GrB_Info GB_Adot3B__min_rminus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_uint64
+GrB_Info GB_Asaxpy3B__plus_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_rminus_fp32
+GrB_Info GB_Adot4B__plus_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_fp32
+GrB_Info GB_Adot2B__plus_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11428,10 +11766,10 @@ GrB_Info GB_Adot2B__min_rminus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_fp32
+GrB_Info GB_Adot3B__plus_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11439,32 +11777,32 @@ GrB_Info GB_Adot3B__min_rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_fp32
+GrB_Info GB_Asaxpy3B__plus_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_rminus_fp64
+GrB_Info GB_Adot4B__plus_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rminus_fp64
+GrB_Info GB_Adot2B__plus_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11472,10 +11810,10 @@ GrB_Info GB_Adot2B__min_rminus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rminus_fp64
+GrB_Info GB_Adot3B__plus_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11483,33 +11821,32 @@ GrB_Info GB_Adot3B__min_rminus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rminus_fp64
+GrB_Info GB_Asaxpy3B__plus_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_int8
+GrB_Info GB_Adot4B__plus_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_int8
+GrB_Info GB_Adot2B__plus_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11517,10 +11854,10 @@ GrB_Info GB_Adot2B__max_rminus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_int8
+GrB_Info GB_Adot3B__plus_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11528,33 +11865,32 @@ GrB_Info GB_Adot3B__max_rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_int8
+GrB_Info GB_Asaxpy3B__plus_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_int16
+GrB_Info GB_Adot4B__plus_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_int16
+GrB_Info GB_Adot2B__plus_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11562,10 +11898,10 @@ GrB_Info GB_Adot2B__max_rminus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_int16
+GrB_Info GB_Adot3B__plus_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11573,33 +11909,32 @@ GrB_Info GB_Adot3B__max_rminus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_int16
+GrB_Info GB_Asaxpy3B__plus_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_int32
+GrB_Info GB_Adot4B__plus_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_int32
+GrB_Info GB_Adot2B__plus_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11607,10 +11942,10 @@ GrB_Info GB_Adot2B__max_rminus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_int32
+GrB_Info GB_Adot3B__plus_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11618,33 +11953,32 @@ GrB_Info GB_Adot3B__max_rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_int32
+GrB_Info GB_Asaxpy3B__plus_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_int64
+GrB_Info GB_Adot4B__plus_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_int64
+GrB_Info GB_Adot2B__times_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11652,10 +11986,10 @@ GrB_Info GB_Adot2B__max_rminus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_int64
+GrB_Info GB_Adot3B__times_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11663,33 +11997,32 @@ GrB_Info GB_Adot3B__max_rminus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_int64
+GrB_Info GB_Asaxpy3B__times_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_uint8
+GrB_Info GB_Adot4B__times_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_uint8
+GrB_Info GB_Adot2B__times_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11697,10 +12030,10 @@ GrB_Info GB_Adot2B__max_rminus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_uint8
+GrB_Info GB_Adot3B__times_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11708,33 +12041,32 @@ GrB_Info GB_Adot3B__max_rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_uint8
+GrB_Info GB_Asaxpy3B__times_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_uint16
+GrB_Info GB_Adot4B__times_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_uint16
+GrB_Info GB_Adot2B__times_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11742,10 +12074,10 @@ GrB_Info GB_Adot2B__max_rminus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_uint16
+GrB_Info GB_Adot3B__times_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11753,33 +12085,32 @@ GrB_Info GB_Adot3B__max_rminus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_uint16
+GrB_Info GB_Asaxpy3B__times_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_uint32
+GrB_Info GB_Adot4B__times_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_uint32
+GrB_Info GB_Adot2B__times_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11787,10 +12118,10 @@ GrB_Info GB_Adot2B__max_rminus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_uint32
+GrB_Info GB_Adot3B__times_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11798,33 +12129,32 @@ GrB_Info GB_Adot3B__max_rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_uint32
+GrB_Info GB_Asaxpy3B__times_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rminus_uint64
+GrB_Info GB_Adot4B__times_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_uint64
+GrB_Info GB_Adot2B__times_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11832,10 +12162,10 @@ GrB_Info GB_Adot2B__max_rminus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_uint64
+GrB_Info GB_Adot3B__times_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11843,32 +12173,32 @@ GrB_Info GB_Adot3B__max_rminus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_uint64
+GrB_Info GB_Asaxpy3B__times_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_rminus_fp32
+GrB_Info GB_Adot4B__times_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_fp32
+GrB_Info GB_Adot2B__times_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11876,10 +12206,10 @@ GrB_Info GB_Adot2B__max_rminus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_fp32
+GrB_Info GB_Adot3B__times_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11887,32 +12217,32 @@ GrB_Info GB_Adot3B__max_rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_fp32
+GrB_Info GB_Asaxpy3B__times_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_rminus_fp64
+GrB_Info GB_Adot4B__times_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rminus_fp64
+GrB_Info GB_Adot2B__times_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11920,10 +12250,10 @@ GrB_Info GB_Adot2B__max_rminus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rminus_fp64
+GrB_Info GB_Adot3B__times_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11931,33 +12261,32 @@ GrB_Info GB_Adot3B__max_rminus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rminus_fp64
+GrB_Info GB_Asaxpy3B__times_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_int8
+GrB_Info GB_Adot4B__times_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_int8
+GrB_Info GB_Adot2B__times_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -11965,10 +12294,10 @@ GrB_Info GB_Adot2B__plus_rminus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_int8
+GrB_Info GB_Adot3B__times_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -11976,33 +12305,32 @@ GrB_Info GB_Adot3B__plus_rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_int8
+GrB_Info GB_Asaxpy3B__times_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_uint8
+GrB_Info GB_Adot4B__times_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_uint8
+GrB_Info GB_Adot2B__times_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12010,10 +12338,10 @@ GrB_Info GB_Adot2B__plus_rminus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_uint8
+GrB_Info GB_Adot3B__times_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12021,33 +12349,32 @@ GrB_Info GB_Adot3B__plus_rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_uint8
+GrB_Info GB_Asaxpy3B__times_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_int16
+GrB_Info GB_Adot4B__times_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_int16
+GrB_Info GB_Adot2B__times_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12055,10 +12382,10 @@ GrB_Info GB_Adot2B__plus_rminus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_int16
+GrB_Info GB_Adot3B__times_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12066,33 +12393,32 @@ GrB_Info GB_Adot3B__plus_rminus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_int16
+GrB_Info GB_Asaxpy3B__times_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_uint16
+GrB_Info GB_Adot4B__times_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_uint16
+GrB_Info GB_Adot2B__min_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12100,10 +12426,10 @@ GrB_Info GB_Adot2B__plus_rminus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_uint16
+GrB_Info GB_Adot3B__min_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12111,33 +12437,32 @@ GrB_Info GB_Adot3B__plus_rminus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_uint16
+GrB_Info GB_Asaxpy3B__min_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_int32
+GrB_Info GB_Adot4B__min_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_int32
+GrB_Info GB_Adot2B__min_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12145,10 +12470,10 @@ GrB_Info GB_Adot2B__plus_rminus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_int32
+GrB_Info GB_Adot3B__min_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12156,33 +12481,32 @@ GrB_Info GB_Adot3B__plus_rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_int32
+GrB_Info GB_Asaxpy3B__min_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_uint32
+GrB_Info GB_Adot4B__min_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_uint32
+GrB_Info GB_Adot2B__min_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12190,10 +12514,10 @@ GrB_Info GB_Adot2B__plus_rminus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_uint32
+GrB_Info GB_Adot3B__min_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12201,33 +12525,32 @@ GrB_Info GB_Adot3B__plus_rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_uint32
+GrB_Info GB_Asaxpy3B__min_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_int64
+GrB_Info GB_Adot4B__min_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_int64
+GrB_Info GB_Adot2B__min_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12235,10 +12558,10 @@ GrB_Info GB_Adot2B__plus_rminus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_int64
+GrB_Info GB_Adot3B__min_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12246,33 +12569,32 @@ GrB_Info GB_Adot3B__plus_rminus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_int64
+GrB_Info GB_Asaxpy3B__min_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rminus_uint64
+GrB_Info GB_Adot4B__min_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_uint64
+GrB_Info GB_Adot2B__min_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12280,10 +12602,10 @@ GrB_Info GB_Adot2B__plus_rminus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_uint64
+GrB_Info GB_Adot3B__min_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12291,32 +12613,32 @@ GrB_Info GB_Adot3B__plus_rminus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_uint64
+GrB_Info GB_Asaxpy3B__min_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_rminus_fp32
+GrB_Info GB_Adot4B__min_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_fp32
+GrB_Info GB_Adot2B__min_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12324,10 +12646,10 @@ GrB_Info GB_Adot2B__plus_rminus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_fp32
+GrB_Info GB_Adot3B__min_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12335,32 +12657,32 @@ GrB_Info GB_Adot3B__plus_rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_fp32
+GrB_Info GB_Asaxpy3B__min_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_rminus_fp64
+GrB_Info GB_Adot4B__min_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rminus_fp64
+GrB_Info GB_Adot2B__min_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12368,10 +12690,10 @@ GrB_Info GB_Adot2B__plus_rminus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rminus_fp64
+GrB_Info GB_Adot3B__min_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12379,33 +12701,32 @@ GrB_Info GB_Adot3B__plus_rminus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rminus_fp64
+GrB_Info GB_Asaxpy3B__min_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_int8
+GrB_Info GB_Adot4B__min_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_int8
+GrB_Info GB_Adot2B__min_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12413,10 +12734,10 @@ GrB_Info GB_Adot2B__times_rminus_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_int8
+GrB_Info GB_Adot3B__min_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12424,33 +12745,32 @@ GrB_Info GB_Adot3B__times_rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_int8
+GrB_Info GB_Asaxpy3B__min_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_uint8
+GrB_Info GB_Adot4B__min_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_uint8
+GrB_Info GB_Adot2B__min_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12458,10 +12778,10 @@ GrB_Info GB_Adot2B__times_rminus_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_uint8
+GrB_Info GB_Adot3B__min_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12469,33 +12789,32 @@ GrB_Info GB_Adot3B__times_rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_uint8
+GrB_Info GB_Asaxpy3B__min_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_int16
+GrB_Info GB_Adot4B__min_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_int16
+GrB_Info GB_Adot2B__min_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12503,10 +12822,10 @@ GrB_Info GB_Adot2B__times_rminus_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_int16
+GrB_Info GB_Adot3B__min_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12514,33 +12833,32 @@ GrB_Info GB_Adot3B__times_rminus_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_int16
+GrB_Info GB_Asaxpy3B__min_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_uint16
+GrB_Info GB_Adot4B__min_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_uint16
+GrB_Info GB_Adot2B__max_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12548,10 +12866,10 @@ GrB_Info GB_Adot2B__times_rminus_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_uint16
+GrB_Info GB_Adot3B__max_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12559,33 +12877,32 @@ GrB_Info GB_Adot3B__times_rminus_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_uint16
+GrB_Info GB_Asaxpy3B__max_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_int32
+GrB_Info GB_Adot4B__max_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_int32
+GrB_Info GB_Adot2B__max_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12593,10 +12910,10 @@ GrB_Info GB_Adot2B__times_rminus_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_int32
+GrB_Info GB_Adot3B__max_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12604,33 +12921,32 @@ GrB_Info GB_Adot3B__times_rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_int32
+GrB_Info GB_Asaxpy3B__max_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_uint32
+GrB_Info GB_Adot4B__max_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_uint32
+GrB_Info GB_Adot2B__max_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12638,10 +12954,10 @@ GrB_Info GB_Adot2B__times_rminus_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_uint32
+GrB_Info GB_Adot3B__max_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12649,33 +12965,32 @@ GrB_Info GB_Adot3B__times_rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_uint32
+GrB_Info GB_Asaxpy3B__max_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_int64
+GrB_Info GB_Adot4B__max_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_int64
+GrB_Info GB_Adot2B__max_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12683,10 +12998,10 @@ GrB_Info GB_Adot2B__times_rminus_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_int64
+GrB_Info GB_Adot3B__max_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12694,33 +13009,32 @@ GrB_Info GB_Adot3B__times_rminus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_int64
+GrB_Info GB_Asaxpy3B__max_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rminus_uint64
+GrB_Info GB_Adot4B__max_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_uint64
+GrB_Info GB_Adot2B__max_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12728,10 +13042,10 @@ GrB_Info GB_Adot2B__times_rminus_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_uint64
+GrB_Info GB_Adot3B__max_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12739,32 +13053,32 @@ GrB_Info GB_Adot3B__times_rminus_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_uint64
+GrB_Info GB_Asaxpy3B__max_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_rminus_fp32
+GrB_Info GB_Adot4B__max_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_fp32
+GrB_Info GB_Adot2B__max_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12772,10 +13086,10 @@ GrB_Info GB_Adot2B__times_rminus_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_fp32
+GrB_Info GB_Adot3B__max_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12783,32 +13097,32 @@ GrB_Info GB_Adot3B__times_rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_fp32
+GrB_Info GB_Asaxpy3B__max_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_rminus_fp64
+GrB_Info GB_Adot4B__max_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rminus_fp64
+GrB_Info GB_Adot2B__max_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12816,10 +13130,10 @@ GrB_Info GB_Adot2B__times_rminus_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rminus_fp64
+GrB_Info GB_Adot3B__max_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12827,33 +13141,32 @@ GrB_Info GB_Adot3B__times_rminus_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rminus_fp64
+GrB_Info GB_Asaxpy3B__max_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_int8
+GrB_Info GB_Adot4B__max_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_int8
+GrB_Info GB_Adot2B__max_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12861,10 +13174,10 @@ GrB_Info GB_Adot2B__min_times_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_int8
+GrB_Info GB_Adot3B__max_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12872,33 +13185,32 @@ GrB_Info GB_Adot3B__min_times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_int8
+GrB_Info GB_Asaxpy3B__max_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_int16
+GrB_Info GB_Adot4B__max_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_int16
+GrB_Info GB_Adot2B__max_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12906,10 +13218,10 @@ GrB_Info GB_Adot2B__min_times_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_int16
+GrB_Info GB_Adot3B__max_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12917,33 +13229,32 @@ GrB_Info GB_Adot3B__min_times_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_int16
+GrB_Info GB_Asaxpy3B__max_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_int32
+GrB_Info GB_Adot4B__max_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_int32
+GrB_Info GB_Adot2B__max_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12951,10 +13262,10 @@ GrB_Info GB_Adot2B__min_times_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_int32
+GrB_Info GB_Adot3B__max_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -12962,33 +13273,32 @@ GrB_Info GB_Adot3B__min_times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_int32
+GrB_Info GB_Asaxpy3B__max_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_int64
+GrB_Info GB_Adot4B__max_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_int64
+GrB_Info GB_Adot2B__any_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -12996,10 +13306,10 @@ GrB_Info GB_Adot2B__min_times_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_int64
+GrB_Info GB_Adot3B__any_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13007,33 +13317,32 @@ GrB_Info GB_Adot3B__min_times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_int64
+GrB_Info GB_Asaxpy3B__any_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_uint8
+GrB_Info GB_Adot4B__any_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_uint8
+GrB_Info GB_Adot2B__any_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13041,10 +13350,10 @@ GrB_Info GB_Adot2B__min_times_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_uint8
+GrB_Info GB_Adot3B__any_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13052,33 +13361,32 @@ GrB_Info GB_Adot3B__min_times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_uint8
+GrB_Info GB_Asaxpy3B__any_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_uint16
+GrB_Info GB_Adot4B__any_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_uint16
+GrB_Info GB_Adot2B__any_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13086,10 +13394,10 @@ GrB_Info GB_Adot2B__min_times_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_uint16
+GrB_Info GB_Adot3B__any_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13097,33 +13405,32 @@ GrB_Info GB_Adot3B__min_times_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_uint16
+GrB_Info GB_Asaxpy3B__any_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_uint32
+GrB_Info GB_Adot4B__any_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_uint32
+GrB_Info GB_Adot2B__any_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13131,10 +13438,10 @@ GrB_Info GB_Adot2B__min_times_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_uint32
+GrB_Info GB_Adot3B__any_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13142,33 +13449,32 @@ GrB_Info GB_Adot3B__min_times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_uint32
+GrB_Info GB_Asaxpy3B__any_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_times_uint64
+GrB_Info GB_Adot4B__any_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_uint64
+GrB_Info GB_Adot2B__any_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13176,10 +13482,10 @@ GrB_Info GB_Adot2B__min_times_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_uint64
+GrB_Info GB_Adot3B__any_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13187,32 +13493,32 @@ GrB_Info GB_Adot3B__min_times_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_uint64
+GrB_Info GB_Asaxpy3B__any_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_times_fp32
+GrB_Info GB_Adot4B__any_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_fp32
+GrB_Info GB_Adot2B__any_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13220,10 +13526,10 @@ GrB_Info GB_Adot2B__min_times_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_fp32
+GrB_Info GB_Adot3B__any_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13231,32 +13537,32 @@ GrB_Info GB_Adot3B__min_times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_fp32
+GrB_Info GB_Asaxpy3B__any_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_times_fp64
+GrB_Info GB_Adot4B__any_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_times_fp64
+GrB_Info GB_Adot2B__any_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13264,10 +13570,10 @@ GrB_Info GB_Adot2B__min_times_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_times_fp64
+GrB_Info GB_Adot3B__any_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13275,33 +13581,32 @@ GrB_Info GB_Adot3B__min_times_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_times_fp64
+GrB_Info GB_Asaxpy3B__any_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_int8
+GrB_Info GB_Adot4B__any_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_int8
+GrB_Info GB_Adot2B__any_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13309,10 +13614,10 @@ GrB_Info GB_Adot2B__max_times_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_int8
+GrB_Info GB_Adot3B__any_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13320,33 +13625,32 @@ GrB_Info GB_Adot3B__max_times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_int8
+GrB_Info GB_Asaxpy3B__any_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_int16
+GrB_Info GB_Adot4B__any_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_int16
+GrB_Info GB_Adot2B__any_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13354,10 +13658,10 @@ GrB_Info GB_Adot2B__max_times_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_int16
+GrB_Info GB_Adot3B__any_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13365,33 +13669,32 @@ GrB_Info GB_Adot3B__max_times_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_int16
+GrB_Info GB_Asaxpy3B__any_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_int32
+GrB_Info GB_Adot4B__any_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_int32
+GrB_Info GB_Adot2B__any_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13399,10 +13702,10 @@ GrB_Info GB_Adot2B__max_times_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_int32
+GrB_Info GB_Adot3B__any_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13410,33 +13713,32 @@ GrB_Info GB_Adot3B__max_times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_int32
+GrB_Info GB_Asaxpy3B__any_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_int64
+GrB_Info GB_Adot4B__any_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_int64
+GrB_Info GB_Adot2B__plus_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13444,10 +13746,10 @@ GrB_Info GB_Adot2B__max_times_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_int64
+GrB_Info GB_Adot3B__plus_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13455,33 +13757,32 @@ GrB_Info GB_Adot3B__max_times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_int64
+GrB_Info GB_Asaxpy3B__plus_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_uint8
+GrB_Info GB_Adot4B__plus_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_uint8
+GrB_Info GB_Adot2B__plus_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13489,10 +13790,10 @@ GrB_Info GB_Adot2B__max_times_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_uint8
+GrB_Info GB_Adot3B__plus_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13500,33 +13801,32 @@ GrB_Info GB_Adot3B__max_times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_uint8
+GrB_Info GB_Asaxpy3B__plus_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_uint16
+GrB_Info GB_Adot4B__plus_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_uint16
+GrB_Info GB_Adot2B__plus_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13534,10 +13834,10 @@ GrB_Info GB_Adot2B__max_times_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_uint16
+GrB_Info GB_Adot3B__plus_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13545,33 +13845,32 @@ GrB_Info GB_Adot3B__max_times_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_uint16
+GrB_Info GB_Asaxpy3B__plus_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_uint32
+GrB_Info GB_Adot4B__plus_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_uint32
+GrB_Info GB_Adot2B__plus_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13579,10 +13878,10 @@ GrB_Info GB_Adot2B__max_times_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_uint32
+GrB_Info GB_Adot3B__plus_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13590,33 +13889,32 @@ GrB_Info GB_Adot3B__max_times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_uint32
+GrB_Info GB_Asaxpy3B__plus_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_times_uint64
+GrB_Info GB_Adot4B__plus_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_uint64
+GrB_Info GB_Adot2B__plus_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13624,10 +13922,10 @@ GrB_Info GB_Adot2B__max_times_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_uint64
+GrB_Info GB_Adot3B__plus_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13635,32 +13933,32 @@ GrB_Info GB_Adot3B__max_times_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_uint64
+GrB_Info GB_Asaxpy3B__plus_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_times_fp32
+GrB_Info GB_Adot4B__plus_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_fp32
+GrB_Info GB_Adot2B__plus_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13668,10 +13966,10 @@ GrB_Info GB_Adot2B__max_times_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_fp32
+GrB_Info GB_Adot3B__plus_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13679,32 +13977,32 @@ GrB_Info GB_Adot3B__max_times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_fp32
+GrB_Info GB_Asaxpy3B__plus_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_times_fp64
+GrB_Info GB_Adot4B__plus_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_times_fp64
+GrB_Info GB_Adot2B__plus_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13712,10 +14010,10 @@ GrB_Info GB_Adot2B__max_times_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_times_fp64
+GrB_Info GB_Adot3B__plus_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13723,33 +14021,32 @@ GrB_Info GB_Adot3B__max_times_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_times_fp64
+GrB_Info GB_Asaxpy3B__plus_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_int8
+GrB_Info GB_Adot4B__plus_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_int8
+GrB_Info GB_Adot2B__plus_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13757,10 +14054,10 @@ GrB_Info GB_Adot2B__plus_times_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_int8
+GrB_Info GB_Adot3B__plus_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13768,33 +14065,32 @@ GrB_Info GB_Adot3B__plus_times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_int8
+GrB_Info GB_Asaxpy3B__plus_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_uint8
+GrB_Info GB_Adot4B__plus_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_uint8
+GrB_Info GB_Adot2B__plus_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13802,10 +14098,10 @@ GrB_Info GB_Adot2B__plus_times_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_uint8
+GrB_Info GB_Adot3B__plus_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13813,33 +14109,32 @@ GrB_Info GB_Adot3B__plus_times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_uint8
+GrB_Info GB_Asaxpy3B__plus_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_int16
+GrB_Info GB_Adot4B__plus_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_int16
+GrB_Info GB_Adot2B__plus_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13847,10 +14142,10 @@ GrB_Info GB_Adot2B__plus_times_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_int16
+GrB_Info GB_Adot3B__plus_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13858,33 +14153,32 @@ GrB_Info GB_Adot3B__plus_times_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_int16
+GrB_Info GB_Asaxpy3B__plus_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_uint16
+GrB_Info GB_Adot4B__plus_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_uint16
+GrB_Info GB_Adot2B__times_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13892,10 +14186,10 @@ GrB_Info GB_Adot2B__plus_times_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_uint16
+GrB_Info GB_Adot3B__times_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13903,33 +14197,32 @@ GrB_Info GB_Adot3B__plus_times_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_uint16
+GrB_Info GB_Asaxpy3B__times_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_int32
+GrB_Info GB_Adot4B__times_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_int32
+GrB_Info GB_Adot2B__times_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13937,10 +14230,10 @@ GrB_Info GB_Adot2B__plus_times_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_int32
+GrB_Info GB_Adot3B__times_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13948,33 +14241,32 @@ GrB_Info GB_Adot3B__plus_times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_int32
+GrB_Info GB_Asaxpy3B__times_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_uint32
+GrB_Info GB_Adot4B__times_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_uint32
+GrB_Info GB_Adot2B__times_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -13982,10 +14274,10 @@ GrB_Info GB_Adot2B__plus_times_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_uint32
+GrB_Info GB_Adot3B__times_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -13993,33 +14285,32 @@ GrB_Info GB_Adot3B__plus_times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_uint32
+GrB_Info GB_Asaxpy3B__times_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_int64
+GrB_Info GB_Adot4B__times_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_int64
+GrB_Info GB_Adot2B__times_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14027,10 +14318,10 @@ GrB_Info GB_Adot2B__plus_times_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_int64
+GrB_Info GB_Adot3B__times_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14038,33 +14329,32 @@ GrB_Info GB_Adot3B__plus_times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_int64
+GrB_Info GB_Asaxpy3B__times_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_times_uint64
+GrB_Info GB_Adot4B__times_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_uint64
+GrB_Info GB_Adot2B__times_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14072,10 +14362,10 @@ GrB_Info GB_Adot2B__plus_times_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_uint64
+GrB_Info GB_Adot3B__times_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14083,32 +14373,32 @@ GrB_Info GB_Adot3B__plus_times_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_uint64
+GrB_Info GB_Asaxpy3B__times_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_times_fp32
+GrB_Info GB_Adot4B__times_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_fp32
+GrB_Info GB_Adot2B__times_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14116,10 +14406,10 @@ GrB_Info GB_Adot2B__plus_times_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_fp32
+GrB_Info GB_Adot3B__times_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14127,32 +14417,32 @@ GrB_Info GB_Adot3B__plus_times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_fp32
+GrB_Info GB_Asaxpy3B__times_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_times_fp64
+GrB_Info GB_Adot4B__times_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_times_fp64
+GrB_Info GB_Adot2B__times_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14160,10 +14450,10 @@ GrB_Info GB_Adot2B__plus_times_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_times_fp64
+GrB_Info GB_Adot3B__times_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14171,33 +14461,32 @@ GrB_Info GB_Adot3B__plus_times_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_times_fp64
+GrB_Info GB_Asaxpy3B__times_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_int8
+GrB_Info GB_Adot4B__times_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_int8
+GrB_Info GB_Adot2B__times_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14205,10 +14494,10 @@ GrB_Info GB_Adot2B__times_times_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_int8
+GrB_Info GB_Adot3B__times_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14216,33 +14505,32 @@ GrB_Info GB_Adot3B__times_times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_int8
+GrB_Info GB_Asaxpy3B__times_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_uint8
+GrB_Info GB_Adot4B__times_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_uint8
+GrB_Info GB_Adot2B__times_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14250,10 +14538,10 @@ GrB_Info GB_Adot2B__times_times_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_uint8
+GrB_Info GB_Adot3B__times_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14261,33 +14549,32 @@ GrB_Info GB_Adot3B__times_times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_uint8
+GrB_Info GB_Asaxpy3B__times_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_int16
+GrB_Info GB_Adot4B__times_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_int16
+GrB_Info GB_Adot2B__times_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14295,10 +14582,10 @@ GrB_Info GB_Adot2B__times_times_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_int16
+GrB_Info GB_Adot3B__times_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14306,33 +14593,11120 @@ GrB_Info GB_Adot3B__times_times_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_int16
+GrB_Info GB_Asaxpy3B__times_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_uint16
+GrB_Info GB_Adot4B__times_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__max_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__max_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__max_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__max_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__any_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__plus_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__plus_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__times_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__times_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__times_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__min_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__min_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Asaxpy3B__min_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__times_times_uint16
+GrB_Info GB_Adot4B__min_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__min_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14340,10 +25714,10 @@ GrB_Info GB_Adot2B__times_times_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_uint16
+GrB_Info GB_Adot3B__min_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14351,33 +25725,32 @@ GrB_Info GB_Adot3B__times_times_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_uint16
+GrB_Info GB_Asaxpy3B__min_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_int32
+GrB_Info GB_Adot4B__min_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_int32
+GrB_Info GB_Adot2B__min_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14385,10 +25758,10 @@ GrB_Info GB_Adot2B__times_times_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_int32
+GrB_Info GB_Adot3B__min_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14396,33 +25769,32 @@ GrB_Info GB_Adot3B__times_times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_int32
+GrB_Info GB_Asaxpy3B__min_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_uint32
+GrB_Info GB_Adot4B__min_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_uint32
+GrB_Info GB_Adot2B__min_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14430,10 +25802,10 @@ GrB_Info GB_Adot2B__times_times_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_uint32
+GrB_Info GB_Adot3B__min_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14441,33 +25813,32 @@ GrB_Info GB_Adot3B__times_times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_uint32
+GrB_Info GB_Asaxpy3B__min_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_int64
+GrB_Info GB_Adot4B__min_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_int64
+GrB_Info GB_Adot2B__min_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14475,10 +25846,10 @@ GrB_Info GB_Adot2B__times_times_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_int64
+GrB_Info GB_Adot3B__min_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14486,33 +25857,32 @@ GrB_Info GB_Adot3B__times_times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_int64
+GrB_Info GB_Asaxpy3B__min_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_times_uint64
+GrB_Info GB_Adot4B__min_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_uint64
+GrB_Info GB_Adot2B__min_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14520,10 +25890,10 @@ GrB_Info GB_Adot2B__times_times_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_uint64
+GrB_Info GB_Adot3B__min_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14531,32 +25901,32 @@ GrB_Info GB_Adot3B__times_times_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_uint64
+GrB_Info GB_Asaxpy3B__min_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_times_fp32
+GrB_Info GB_Adot4B__min_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_fp32
+GrB_Info GB_Adot2B__min_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14564,10 +25934,10 @@ GrB_Info GB_Adot2B__times_times_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_fp32
+GrB_Info GB_Adot3B__min_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14575,32 +25945,32 @@ GrB_Info GB_Adot3B__times_times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_fp32
+GrB_Info GB_Asaxpy3B__min_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_times_fp64
+GrB_Info GB_Adot4B__min_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_times_fp64
+GrB_Info GB_Adot2B__min_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14608,10 +25978,10 @@ GrB_Info GB_Adot2B__times_times_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_times_fp64
+GrB_Info GB_Adot3B__min_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14619,33 +25989,32 @@ GrB_Info GB_Adot3B__times_times_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_times_fp64
+GrB_Info GB_Asaxpy3B__min_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_int8
+GrB_Info GB_Adot4B__min_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_int8
+GrB_Info GB_Adot2B__min_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14653,10 +26022,10 @@ GrB_Info GB_Adot2B__min_div_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_int8
+GrB_Info GB_Adot3B__min_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14664,33 +26033,32 @@ GrB_Info GB_Adot3B__min_div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_int8
+GrB_Info GB_Asaxpy3B__min_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_int16
+GrB_Info GB_Adot4B__min_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_int16
+GrB_Info GB_Adot2B__max_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14698,10 +26066,10 @@ GrB_Info GB_Adot2B__min_div_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_int16
+GrB_Info GB_Adot3B__max_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14709,33 +26077,32 @@ GrB_Info GB_Adot3B__min_div_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_int16
+GrB_Info GB_Asaxpy3B__max_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_int32
+GrB_Info GB_Adot4B__max_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_int32
+GrB_Info GB_Adot2B__max_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14743,10 +26110,10 @@ GrB_Info GB_Adot2B__min_div_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_int32
+GrB_Info GB_Adot3B__max_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14754,33 +26121,32 @@ GrB_Info GB_Adot3B__min_div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_int32
+GrB_Info GB_Asaxpy3B__max_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_int64
+GrB_Info GB_Adot4B__max_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_int64
+GrB_Info GB_Adot2B__max_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14788,10 +26154,10 @@ GrB_Info GB_Adot2B__min_div_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_int64
+GrB_Info GB_Adot3B__max_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14799,33 +26165,32 @@ GrB_Info GB_Adot3B__min_div_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_int64
+GrB_Info GB_Asaxpy3B__max_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_uint8
+GrB_Info GB_Adot4B__max_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_uint8
+GrB_Info GB_Adot2B__max_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14833,10 +26198,10 @@ GrB_Info GB_Adot2B__min_div_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_uint8
+GrB_Info GB_Adot3B__max_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14844,33 +26209,32 @@ GrB_Info GB_Adot3B__min_div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_uint8
+GrB_Info GB_Asaxpy3B__max_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_uint16
+GrB_Info GB_Adot4B__max_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_uint16
+GrB_Info GB_Adot2B__max_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14878,10 +26242,10 @@ GrB_Info GB_Adot2B__min_div_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_uint16
+GrB_Info GB_Adot3B__max_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14889,33 +26253,32 @@ GrB_Info GB_Adot3B__min_div_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_uint16
+GrB_Info GB_Asaxpy3B__max_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_uint32
+GrB_Info GB_Adot4B__max_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_uint32
+GrB_Info GB_Adot2B__max_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14923,10 +26286,10 @@ GrB_Info GB_Adot2B__min_div_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_uint32
+GrB_Info GB_Adot3B__max_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14934,33 +26297,32 @@ GrB_Info GB_Adot3B__min_div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_uint32
+GrB_Info GB_Asaxpy3B__max_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_div_uint64
+GrB_Info GB_Adot4B__max_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_uint64
+GrB_Info GB_Adot2B__max_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -14968,10 +26330,10 @@ GrB_Info GB_Adot2B__min_div_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_uint64
+GrB_Info GB_Adot3B__max_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -14979,32 +26341,32 @@ GrB_Info GB_Adot3B__min_div_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_uint64
+GrB_Info GB_Asaxpy3B__max_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_div_fp32
+GrB_Info GB_Adot4B__max_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_fp32
+GrB_Info GB_Adot2B__max_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15012,10 +26374,10 @@ GrB_Info GB_Adot2B__min_div_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_fp32
+GrB_Info GB_Adot3B__max_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15023,32 +26385,32 @@ GrB_Info GB_Adot3B__min_div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_fp32
+GrB_Info GB_Asaxpy3B__max_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_div_fp64
+GrB_Info GB_Adot4B__max_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_div_fp64
+GrB_Info GB_Adot2B__max_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15056,10 +26418,10 @@ GrB_Info GB_Adot2B__min_div_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_div_fp64
+GrB_Info GB_Adot3B__max_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15067,33 +26429,32 @@ GrB_Info GB_Adot3B__min_div_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_div_fp64
+GrB_Info GB_Asaxpy3B__max_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_int8
+GrB_Info GB_Adot4B__max_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_int8
+GrB_Info GB_Adot2B__max_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15101,10 +26462,10 @@ GrB_Info GB_Adot2B__max_div_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_int8
+GrB_Info GB_Adot3B__max_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15112,33 +26473,32 @@ GrB_Info GB_Adot3B__max_div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_int8
+GrB_Info GB_Asaxpy3B__max_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_int16
+GrB_Info GB_Adot4B__max_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_int16
+GrB_Info GB_Adot2B__any_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15146,10 +26506,10 @@ GrB_Info GB_Adot2B__max_div_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_int16
+GrB_Info GB_Adot3B__any_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15157,33 +26517,32 @@ GrB_Info GB_Adot3B__max_div_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_int16
+GrB_Info GB_Asaxpy3B__any_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_int32
+GrB_Info GB_Adot4B__any_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_int32
+GrB_Info GB_Adot2B__any_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15191,10 +26550,10 @@ GrB_Info GB_Adot2B__max_div_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_int32
+GrB_Info GB_Adot3B__any_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15202,33 +26561,32 @@ GrB_Info GB_Adot3B__max_div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_int32
+GrB_Info GB_Asaxpy3B__any_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_int64
+GrB_Info GB_Adot4B__any_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_int64
+GrB_Info GB_Adot2B__any_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15236,10 +26594,10 @@ GrB_Info GB_Adot2B__max_div_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_int64
+GrB_Info GB_Adot3B__any_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15247,33 +26605,32 @@ GrB_Info GB_Adot3B__max_div_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_int64
+GrB_Info GB_Asaxpy3B__any_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_uint8
+GrB_Info GB_Adot4B__any_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_uint8
+GrB_Info GB_Adot2B__any_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15281,10 +26638,10 @@ GrB_Info GB_Adot2B__max_div_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_uint8
+GrB_Info GB_Adot3B__any_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15292,33 +26649,32 @@ GrB_Info GB_Adot3B__max_div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_uint8
+GrB_Info GB_Asaxpy3B__any_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_uint16
+GrB_Info GB_Adot4B__any_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_uint16
+GrB_Info GB_Adot2B__any_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15326,10 +26682,10 @@ GrB_Info GB_Adot2B__max_div_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_uint16
+GrB_Info GB_Adot3B__any_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15337,33 +26693,32 @@ GrB_Info GB_Adot3B__max_div_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_uint16
+GrB_Info GB_Asaxpy3B__any_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_uint32
+GrB_Info GB_Adot4B__any_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_uint32
+GrB_Info GB_Adot2B__any_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15371,10 +26726,10 @@ GrB_Info GB_Adot2B__max_div_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_uint32
+GrB_Info GB_Adot3B__any_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15382,33 +26737,32 @@ GrB_Info GB_Adot3B__max_div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_uint32
+GrB_Info GB_Asaxpy3B__any_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_div_uint64
+GrB_Info GB_Adot4B__any_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_uint64
+GrB_Info GB_Adot2B__any_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15416,10 +26770,10 @@ GrB_Info GB_Adot2B__max_div_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_uint64
+GrB_Info GB_Adot3B__any_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15427,32 +26781,32 @@ GrB_Info GB_Adot3B__max_div_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_uint64
+GrB_Info GB_Asaxpy3B__any_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_div_fp32
+GrB_Info GB_Adot4B__any_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_fp32
+GrB_Info GB_Adot2B__any_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15460,10 +26814,10 @@ GrB_Info GB_Adot2B__max_div_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_fp32
+GrB_Info GB_Adot3B__any_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15471,32 +26825,32 @@ GrB_Info GB_Adot3B__max_div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_fp32
+GrB_Info GB_Asaxpy3B__any_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_div_fp64
+GrB_Info GB_Adot4B__any_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_div_fp64
+GrB_Info GB_Adot2B__any_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15504,10 +26858,10 @@ GrB_Info GB_Adot2B__max_div_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_div_fp64
+GrB_Info GB_Adot3B__any_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15515,33 +26869,76 @@ GrB_Info GB_Adot3B__max_div_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_div_fp64
+GrB_Info GB_Asaxpy3B__any_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_int8
+GrB_Info GB_Asaxpy3B__any_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_int8
+GrB_Info GB_Adot4B__any_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15549,10 +26946,10 @@ GrB_Info GB_Adot2B__plus_div_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_int8
+GrB_Info GB_Adot3B__plus_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15560,33 +26957,76 @@ GrB_Info GB_Adot3B__plus_div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_int8
+GrB_Info GB_Asaxpy3B__plus_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_uint8
+GrB_Info GB_Asaxpy3B__plus_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_uint8
+GrB_Info GB_Adot4B__plus_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15594,10 +27034,10 @@ GrB_Info GB_Adot2B__plus_div_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_uint8
+GrB_Info GB_Adot3B__plus_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15605,33 +27045,76 @@ GrB_Info GB_Adot3B__plus_div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_uint8
+GrB_Info GB_Asaxpy3B__plus_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_int16
+GrB_Info GB_Asaxpy3B__plus_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_int16
+GrB_Info GB_Adot4B__plus_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15639,10 +27122,10 @@ GrB_Info GB_Adot2B__plus_div_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_int16
+GrB_Info GB_Adot3B__plus_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15650,33 +27133,76 @@ GrB_Info GB_Adot3B__plus_div_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_int16
+GrB_Info GB_Asaxpy3B__plus_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_uint16
+GrB_Info GB_Asaxpy3B__plus_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_uint16
+GrB_Info GB_Adot4B__plus_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15684,10 +27210,10 @@ GrB_Info GB_Adot2B__plus_div_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_uint16
+GrB_Info GB_Adot3B__plus_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15695,33 +27221,76 @@ GrB_Info GB_Adot3B__plus_div_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_uint16
+GrB_Info GB_Asaxpy3B__plus_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_int32
+GrB_Info GB_Asaxpy3B__plus_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_int32
+GrB_Info GB_Adot4B__plus_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15729,10 +27298,10 @@ GrB_Info GB_Adot2B__plus_div_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_int32
+GrB_Info GB_Adot3B__plus_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15740,33 +27309,76 @@ GrB_Info GB_Adot3B__plus_div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_int32
+GrB_Info GB_Asaxpy3B__plus_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_uint32
+GrB_Info GB_Asaxpy3B__plus_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_uint32
+GrB_Info GB_Adot4B__plus_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15774,10 +27386,10 @@ GrB_Info GB_Adot2B__plus_div_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_uint32
+GrB_Info GB_Adot3B__times_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15785,33 +27397,76 @@ GrB_Info GB_Adot3B__plus_div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_uint32
+GrB_Info GB_Asaxpy3B__times_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__times_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__times_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_div_int64
+GrB_Info GB_Asaxpy3B__times_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_div_int64
+GrB_Info GB_Adot4B__times_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__times_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15819,10 +27474,10 @@ GrB_Info GB_Adot2B__plus_div_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_int64
+GrB_Info GB_Adot3B__times_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15830,33 +27485,32 @@ GrB_Info GB_Adot3B__plus_div_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_int64
+GrB_Info GB_Asaxpy3B__times_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_div_uint64
+GrB_Info GB_Adot4B__times_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_div_uint64
+GrB_Info GB_Adot2B__times_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15864,10 +27518,10 @@ GrB_Info GB_Adot2B__plus_div_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_uint64
+GrB_Info GB_Adot3B__times_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15875,32 +27529,32 @@ GrB_Info GB_Adot3B__plus_div_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_uint64
+GrB_Info GB_Asaxpy3B__times_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_div_fp32
+GrB_Info GB_Adot4B__times_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_div_fp32
+GrB_Info GB_Adot2B__times_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15908,10 +27562,10 @@ GrB_Info GB_Adot2B__plus_div_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_fp32
+GrB_Info GB_Adot3B__times_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15919,32 +27573,32 @@ GrB_Info GB_Adot3B__plus_div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_fp32
+GrB_Info GB_Asaxpy3B__times_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_div_fp64
+GrB_Info GB_Adot4B__times_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_div_fp64
+GrB_Info GB_Adot2B__times_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15952,10 +27606,10 @@ GrB_Info GB_Adot2B__plus_div_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_div_fp64
+GrB_Info GB_Adot3B__times_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -15963,33 +27617,32 @@ GrB_Info GB_Adot3B__plus_div_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_div_fp64
+GrB_Info GB_Asaxpy3B__times_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_int8
+GrB_Info GB_Adot4B__times_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_int8
+GrB_Info GB_Adot2B__times_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -15997,10 +27650,10 @@ GrB_Info GB_Adot2B__times_div_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_int8
+GrB_Info GB_Adot3B__times_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16008,33 +27661,32 @@ GrB_Info GB_Adot3B__times_div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_int8
+GrB_Info GB_Asaxpy3B__times_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_uint8
+GrB_Info GB_Adot4B__times_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_uint8
+GrB_Info GB_Adot2B__times_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16042,10 +27694,10 @@ GrB_Info GB_Adot2B__times_div_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_uint8
+GrB_Info GB_Adot3B__times_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16053,33 +27705,32 @@ GrB_Info GB_Adot3B__times_div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_uint8
+GrB_Info GB_Asaxpy3B__times_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_int16
+GrB_Info GB_Adot4B__times_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_int16
+GrB_Info GB_Adot2B__times_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16087,10 +27738,10 @@ GrB_Info GB_Adot2B__times_div_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_int16
+GrB_Info GB_Adot3B__times_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16098,33 +27749,32 @@ GrB_Info GB_Adot3B__times_div_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_int16
+GrB_Info GB_Asaxpy3B__times_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_uint16
+GrB_Info GB_Adot4B__times_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_uint16
+GrB_Info GB_Adot2B__times_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16132,10 +27782,10 @@ GrB_Info GB_Adot2B__times_div_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_uint16
+GrB_Info GB_Adot3B__times_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16143,33 +27793,32 @@ GrB_Info GB_Adot3B__times_div_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_uint16
+GrB_Info GB_Asaxpy3B__times_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_int32
+GrB_Info GB_Adot4B__times_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_int32
+GrB_Info GB_Adot2B__min_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16177,10 +27826,10 @@ GrB_Info GB_Adot2B__times_div_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_int32
+GrB_Info GB_Adot3B__min_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16188,33 +27837,32 @@ GrB_Info GB_Adot3B__times_div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_int32
+GrB_Info GB_Asaxpy3B__min_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_uint32
+GrB_Info GB_Adot4B__min_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_uint32
+GrB_Info GB_Adot2B__min_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16222,10 +27870,10 @@ GrB_Info GB_Adot2B__times_div_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_uint32
+GrB_Info GB_Adot3B__min_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16233,33 +27881,32 @@ GrB_Info GB_Adot3B__times_div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_uint32
+GrB_Info GB_Asaxpy3B__min_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_int64
+GrB_Info GB_Adot4B__min_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_int64
+GrB_Info GB_Adot2B__min_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16267,10 +27914,10 @@ GrB_Info GB_Adot2B__times_div_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_int64
+GrB_Info GB_Adot3B__min_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16278,33 +27925,32 @@ GrB_Info GB_Adot3B__times_div_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_int64
+GrB_Info GB_Asaxpy3B__min_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_div_uint64
+GrB_Info GB_Adot4B__min_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_uint64
+GrB_Info GB_Adot2B__min_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16312,10 +27958,10 @@ GrB_Info GB_Adot2B__times_div_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_uint64
+GrB_Info GB_Adot3B__min_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16323,32 +27969,32 @@ GrB_Info GB_Adot3B__times_div_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_uint64
+GrB_Info GB_Asaxpy3B__min_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_div_fp32
+GrB_Info GB_Adot4B__min_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_fp32
+GrB_Info GB_Adot2B__min_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16356,10 +28002,10 @@ GrB_Info GB_Adot2B__times_div_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_fp32
+GrB_Info GB_Adot3B__min_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16367,32 +28013,32 @@ GrB_Info GB_Adot3B__times_div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_fp32
+GrB_Info GB_Asaxpy3B__min_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_div_fp64
+GrB_Info GB_Adot4B__min_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_div_fp64
+GrB_Info GB_Adot2B__min_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16400,10 +28046,10 @@ GrB_Info GB_Adot2B__times_div_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_div_fp64
+GrB_Info GB_Adot3B__min_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16411,33 +28057,32 @@ GrB_Info GB_Adot3B__times_div_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_div_fp64
+GrB_Info GB_Asaxpy3B__min_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_int8
+GrB_Info GB_Adot4B__min_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_int8
+GrB_Info GB_Adot2B__min_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16445,10 +28090,10 @@ GrB_Info GB_Adot2B__min_rdiv_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_int8
+GrB_Info GB_Adot3B__min_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16456,33 +28101,32 @@ GrB_Info GB_Adot3B__min_rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_int8
+GrB_Info GB_Asaxpy3B__min_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_int16
+GrB_Info GB_Adot4B__min_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_int16
+GrB_Info GB_Adot2B__min_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16490,10 +28134,10 @@ GrB_Info GB_Adot2B__min_rdiv_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_int16
+GrB_Info GB_Adot3B__min_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16501,33 +28145,32 @@ GrB_Info GB_Adot3B__min_rdiv_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_int16
+GrB_Info GB_Asaxpy3B__min_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_int32
+GrB_Info GB_Adot4B__min_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_int32
+GrB_Info GB_Adot2B__min_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16535,10 +28178,10 @@ GrB_Info GB_Adot2B__min_rdiv_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_int32
+GrB_Info GB_Adot3B__min_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16546,33 +28189,32 @@ GrB_Info GB_Adot3B__min_rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_int32
+GrB_Info GB_Asaxpy3B__min_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_int64
+GrB_Info GB_Adot4B__min_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_int64
+GrB_Info GB_Adot2B__min_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16580,10 +28222,10 @@ GrB_Info GB_Adot2B__min_rdiv_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_int64
+GrB_Info GB_Adot3B__min_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16591,33 +28233,32 @@ GrB_Info GB_Adot3B__min_rdiv_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_int64
+GrB_Info GB_Asaxpy3B__min_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_uint8
+GrB_Info GB_Adot4B__min_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_uint8
+GrB_Info GB_Adot2B__max_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16625,10 +28266,10 @@ GrB_Info GB_Adot2B__min_rdiv_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_uint8
+GrB_Info GB_Adot3B__max_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16636,33 +28277,32 @@ GrB_Info GB_Adot3B__min_rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_uint8
+GrB_Info GB_Asaxpy3B__max_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_uint16
+GrB_Info GB_Adot4B__max_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_uint16
+GrB_Info GB_Adot2B__max_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16670,10 +28310,10 @@ GrB_Info GB_Adot2B__min_rdiv_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_uint16
+GrB_Info GB_Adot3B__max_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16681,33 +28321,32 @@ GrB_Info GB_Adot3B__min_rdiv_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_uint16
+GrB_Info GB_Asaxpy3B__max_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_uint32
+GrB_Info GB_Adot4B__max_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_uint32
+GrB_Info GB_Adot2B__max_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16715,10 +28354,10 @@ GrB_Info GB_Adot2B__min_rdiv_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_uint32
+GrB_Info GB_Adot3B__max_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16726,33 +28365,32 @@ GrB_Info GB_Adot3B__min_rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_uint32
+GrB_Info GB_Asaxpy3B__max_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_rdiv_uint64
+GrB_Info GB_Adot4B__max_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_uint64
+GrB_Info GB_Adot2B__max_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16760,10 +28398,10 @@ GrB_Info GB_Adot2B__min_rdiv_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_uint64
+GrB_Info GB_Adot3B__max_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16771,32 +28409,32 @@ GrB_Info GB_Adot3B__min_rdiv_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_uint64
+GrB_Info GB_Asaxpy3B__max_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_rdiv_fp32
+GrB_Info GB_Adot4B__max_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_fp32
+GrB_Info GB_Adot2B__max_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16804,10 +28442,10 @@ GrB_Info GB_Adot2B__min_rdiv_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_fp32
+GrB_Info GB_Adot3B__max_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16815,32 +28453,32 @@ GrB_Info GB_Adot3B__min_rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_fp32
+GrB_Info GB_Asaxpy3B__max_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_rdiv_fp64
+GrB_Info GB_Adot4B__max_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_rdiv_fp64
+GrB_Info GB_Adot2B__max_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16848,10 +28486,10 @@ GrB_Info GB_Adot2B__min_rdiv_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_rdiv_fp64
+GrB_Info GB_Adot3B__max_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16859,33 +28497,32 @@ GrB_Info GB_Adot3B__min_rdiv_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_rdiv_fp64
+GrB_Info GB_Asaxpy3B__max_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_int8
+GrB_Info GB_Adot4B__max_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_int8
+GrB_Info GB_Adot2B__max_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16893,10 +28530,10 @@ GrB_Info GB_Adot2B__max_rdiv_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_int8
+GrB_Info GB_Adot3B__max_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16904,33 +28541,32 @@ GrB_Info GB_Adot3B__max_rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_int8
+GrB_Info GB_Asaxpy3B__max_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_int16
+GrB_Info GB_Adot4B__max_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_int16
+GrB_Info GB_Adot2B__max_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16938,10 +28574,10 @@ GrB_Info GB_Adot2B__max_rdiv_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_int16
+GrB_Info GB_Adot3B__max_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16949,33 +28585,32 @@ GrB_Info GB_Adot3B__max_rdiv_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_int16
+GrB_Info GB_Asaxpy3B__max_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_int32
+GrB_Info GB_Adot4B__max_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_int32
+GrB_Info GB_Adot2B__max_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -16983,10 +28618,10 @@ GrB_Info GB_Adot2B__max_rdiv_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_int32
+GrB_Info GB_Adot3B__max_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -16994,33 +28629,32 @@ GrB_Info GB_Adot3B__max_rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_int32
+GrB_Info GB_Asaxpy3B__max_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_int64
+GrB_Info GB_Adot4B__max_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_int64
+GrB_Info GB_Adot2B__max_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17028,10 +28662,10 @@ GrB_Info GB_Adot2B__max_rdiv_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_int64
+GrB_Info GB_Adot3B__max_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17039,33 +28673,32 @@ GrB_Info GB_Adot3B__max_rdiv_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_int64
+GrB_Info GB_Asaxpy3B__max_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_uint8
+GrB_Info GB_Adot4B__max_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_uint8
+GrB_Info GB_Adot2B__any_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17073,10 +28706,10 @@ GrB_Info GB_Adot2B__max_rdiv_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_uint8
+GrB_Info GB_Adot3B__any_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17084,33 +28717,32 @@ GrB_Info GB_Adot3B__max_rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_uint8
+GrB_Info GB_Asaxpy3B__any_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_uint16
+GrB_Info GB_Adot4B__any_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_uint16
+GrB_Info GB_Adot2B__any_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17118,10 +28750,10 @@ GrB_Info GB_Adot2B__max_rdiv_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_uint16
+GrB_Info GB_Adot3B__any_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17129,33 +28761,32 @@ GrB_Info GB_Adot3B__max_rdiv_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_uint16
+GrB_Info GB_Asaxpy3B__any_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_uint32
+GrB_Info GB_Adot4B__any_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_uint32
+GrB_Info GB_Adot2B__any_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17163,10 +28794,10 @@ GrB_Info GB_Adot2B__max_rdiv_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_uint32
+GrB_Info GB_Adot3B__any_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17174,33 +28805,32 @@ GrB_Info GB_Adot3B__max_rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_uint32
+GrB_Info GB_Asaxpy3B__any_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_rdiv_uint64
+GrB_Info GB_Adot4B__any_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_uint64
+GrB_Info GB_Adot2B__any_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17208,10 +28838,10 @@ GrB_Info GB_Adot2B__max_rdiv_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_uint64
+GrB_Info GB_Adot3B__any_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17219,32 +28849,32 @@ GrB_Info GB_Adot3B__max_rdiv_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_uint64
+GrB_Info GB_Asaxpy3B__any_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_rdiv_fp32
+GrB_Info GB_Adot4B__any_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_fp32
+GrB_Info GB_Adot2B__any_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17252,10 +28882,10 @@ GrB_Info GB_Adot2B__max_rdiv_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_fp32
+GrB_Info GB_Adot3B__any_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17263,32 +28893,32 @@ GrB_Info GB_Adot3B__max_rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_fp32
+GrB_Info GB_Asaxpy3B__any_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_rdiv_fp64
+GrB_Info GB_Adot4B__any_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_rdiv_fp64
+GrB_Info GB_Adot2B__any_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17296,10 +28926,10 @@ GrB_Info GB_Adot2B__max_rdiv_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_rdiv_fp64
+GrB_Info GB_Adot3B__any_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17307,33 +28937,32 @@ GrB_Info GB_Adot3B__max_rdiv_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_rdiv_fp64
+GrB_Info GB_Asaxpy3B__any_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_int8
+GrB_Info GB_Adot4B__any_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_int8
+GrB_Info GB_Adot2B__any_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17341,10 +28970,10 @@ GrB_Info GB_Adot2B__plus_rdiv_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_int8
+GrB_Info GB_Adot3B__any_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17352,33 +28981,32 @@ GrB_Info GB_Adot3B__plus_rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_int8
+GrB_Info GB_Asaxpy3B__any_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_uint8
+GrB_Info GB_Adot4B__any_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_uint8
+GrB_Info GB_Adot2B__any_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17386,10 +29014,10 @@ GrB_Info GB_Adot2B__plus_rdiv_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_uint8
+GrB_Info GB_Adot3B__any_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17397,33 +29025,32 @@ GrB_Info GB_Adot3B__plus_rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_uint8
+GrB_Info GB_Asaxpy3B__any_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_int16
+GrB_Info GB_Adot4B__any_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_int16
+GrB_Info GB_Adot2B__any_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17431,10 +29058,10 @@ GrB_Info GB_Adot2B__plus_rdiv_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_int16
+GrB_Info GB_Adot3B__any_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17442,33 +29069,32 @@ GrB_Info GB_Adot3B__plus_rdiv_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_int16
+GrB_Info GB_Asaxpy3B__any_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_uint16
+GrB_Info GB_Adot4B__any_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_uint16
+GrB_Info GB_Adot2B__any_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17476,10 +29102,10 @@ GrB_Info GB_Adot2B__plus_rdiv_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_uint16
+GrB_Info GB_Adot3B__any_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17487,33 +29113,32 @@ GrB_Info GB_Adot3B__plus_rdiv_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_uint16
+GrB_Info GB_Asaxpy3B__any_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_int32
+GrB_Info GB_Adot4B__any_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_int32
+GrB_Info GB_Adot2B__plus_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17521,10 +29146,10 @@ GrB_Info GB_Adot2B__plus_rdiv_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_int32
+GrB_Info GB_Adot3B__plus_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17532,33 +29157,32 @@ GrB_Info GB_Adot3B__plus_rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_int32
+GrB_Info GB_Asaxpy3B__plus_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_uint32
+GrB_Info GB_Adot4B__plus_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_uint32
+GrB_Info GB_Adot2B__plus_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17566,10 +29190,10 @@ GrB_Info GB_Adot2B__plus_rdiv_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_uint32
+GrB_Info GB_Adot3B__plus_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17577,33 +29201,32 @@ GrB_Info GB_Adot3B__plus_rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_uint32
+GrB_Info GB_Asaxpy3B__plus_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_int64
+GrB_Info GB_Adot4B__plus_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_int64
+GrB_Info GB_Adot2B__plus_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17611,10 +29234,10 @@ GrB_Info GB_Adot2B__plus_rdiv_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_int64
+GrB_Info GB_Adot3B__plus_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17622,33 +29245,32 @@ GrB_Info GB_Adot3B__plus_rdiv_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_int64
+GrB_Info GB_Asaxpy3B__plus_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_rdiv_uint64
+GrB_Info GB_Adot4B__plus_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_uint64
+GrB_Info GB_Adot2B__plus_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17656,10 +29278,10 @@ GrB_Info GB_Adot2B__plus_rdiv_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_uint64
+GrB_Info GB_Adot3B__plus_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17667,32 +29289,32 @@ GrB_Info GB_Adot3B__plus_rdiv_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_uint64
+GrB_Info GB_Asaxpy3B__plus_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_rdiv_fp32
+GrB_Info GB_Adot4B__plus_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_fp32
+GrB_Info GB_Adot2B__plus_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17700,10 +29322,10 @@ GrB_Info GB_Adot2B__plus_rdiv_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_fp32
+GrB_Info GB_Adot3B__plus_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17711,32 +29333,32 @@ GrB_Info GB_Adot3B__plus_rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_fp32
+GrB_Info GB_Asaxpy3B__plus_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_rdiv_fp64
+GrB_Info GB_Adot4B__plus_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_rdiv_fp64
+GrB_Info GB_Adot2B__plus_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17744,10 +29366,10 @@ GrB_Info GB_Adot2B__plus_rdiv_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_rdiv_fp64
+GrB_Info GB_Adot3B__plus_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17755,33 +29377,32 @@ GrB_Info GB_Adot3B__plus_rdiv_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_rdiv_fp64
+GrB_Info GB_Asaxpy3B__plus_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_int8
+GrB_Info GB_Adot4B__plus_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_int8
+GrB_Info GB_Adot2B__plus_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17789,10 +29410,10 @@ GrB_Info GB_Adot2B__times_rdiv_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_int8
+GrB_Info GB_Adot3B__plus_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17800,33 +29421,32 @@ GrB_Info GB_Adot3B__times_rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_int8
+GrB_Info GB_Asaxpy3B__plus_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_uint8
+GrB_Info GB_Adot4B__plus_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_uint8
+GrB_Info GB_Adot2B__plus_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17834,10 +29454,10 @@ GrB_Info GB_Adot2B__times_rdiv_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_uint8
+GrB_Info GB_Adot3B__plus_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17845,33 +29465,32 @@ GrB_Info GB_Adot3B__times_rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_uint8
+GrB_Info GB_Asaxpy3B__plus_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_int16
+GrB_Info GB_Adot4B__plus_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_int16
+GrB_Info GB_Adot2B__plus_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17879,10 +29498,10 @@ GrB_Info GB_Adot2B__times_rdiv_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_int16
+GrB_Info GB_Adot3B__plus_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17890,33 +29509,32 @@ GrB_Info GB_Adot3B__times_rdiv_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_int16
+GrB_Info GB_Asaxpy3B__plus_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_uint16
+GrB_Info GB_Adot4B__plus_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_uint16
+GrB_Info GB_Adot2B__plus_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17924,10 +29542,10 @@ GrB_Info GB_Adot2B__times_rdiv_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_uint16
+GrB_Info GB_Adot3B__plus_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17935,33 +29553,32 @@ GrB_Info GB_Adot3B__times_rdiv_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_uint16
+GrB_Info GB_Asaxpy3B__plus_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_int32
+GrB_Info GB_Adot4B__plus_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_int32
+GrB_Info GB_Adot2B__times_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -17969,10 +29586,10 @@ GrB_Info GB_Adot2B__times_rdiv_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_int32
+GrB_Info GB_Adot3B__times_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -17980,33 +29597,32 @@ GrB_Info GB_Adot3B__times_rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_int32
+GrB_Info GB_Asaxpy3B__times_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_uint32
+GrB_Info GB_Adot4B__times_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_uint32
+GrB_Info GB_Adot2B__times_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18014,10 +29630,10 @@ GrB_Info GB_Adot2B__times_rdiv_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_uint32
+GrB_Info GB_Adot3B__times_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18025,33 +29641,32 @@ GrB_Info GB_Adot3B__times_rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_uint32
+GrB_Info GB_Asaxpy3B__times_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_int64
+GrB_Info GB_Adot4B__times_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_int64
+GrB_Info GB_Adot2B__times_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18059,10 +29674,10 @@ GrB_Info GB_Adot2B__times_rdiv_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_int64
+GrB_Info GB_Adot3B__times_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18070,33 +29685,32 @@ GrB_Info GB_Adot3B__times_rdiv_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_int64
+GrB_Info GB_Asaxpy3B__times_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_rdiv_uint64
+GrB_Info GB_Adot4B__times_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_uint64
+GrB_Info GB_Adot2B__times_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18104,10 +29718,10 @@ GrB_Info GB_Adot2B__times_rdiv_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_uint64
+GrB_Info GB_Adot3B__times_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18115,32 +29729,32 @@ GrB_Info GB_Adot3B__times_rdiv_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_uint64
+GrB_Info GB_Asaxpy3B__times_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_rdiv_fp32
+GrB_Info GB_Adot4B__times_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_fp32
+GrB_Info GB_Adot2B__times_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18148,10 +29762,10 @@ GrB_Info GB_Adot2B__times_rdiv_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_fp32
+GrB_Info GB_Adot3B__times_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18159,32 +29773,32 @@ GrB_Info GB_Adot3B__times_rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_fp32
+GrB_Info GB_Asaxpy3B__times_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_rdiv_fp64
+GrB_Info GB_Adot4B__times_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_rdiv_fp64
+GrB_Info GB_Adot2B__times_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18192,10 +29806,10 @@ GrB_Info GB_Adot2B__times_rdiv_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_rdiv_fp64
+GrB_Info GB_Adot3B__times_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18203,32 +29817,32 @@ GrB_Info GB_Adot3B__times_rdiv_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_rdiv_fp64
+GrB_Info GB_Asaxpy3B__times_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_int8
+GrB_Info GB_Adot4B__times_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_int8
+GrB_Info GB_Adot2B__times_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18236,10 +29850,10 @@ GrB_Info GB_Adot2B__min_iseq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_int8
+GrB_Info GB_Adot3B__times_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18247,32 +29861,32 @@ GrB_Info GB_Adot3B__min_iseq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_int8
+GrB_Info GB_Asaxpy3B__times_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_int16
+GrB_Info GB_Adot4B__times_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_int16
+GrB_Info GB_Adot2B__times_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18280,10 +29894,10 @@ GrB_Info GB_Adot2B__min_iseq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_int16
+GrB_Info GB_Adot3B__times_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18291,32 +29905,32 @@ GrB_Info GB_Adot3B__min_iseq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_int16
+GrB_Info GB_Asaxpy3B__times_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_int32
+GrB_Info GB_Adot4B__times_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_int32
+GrB_Info GB_Adot2B__times_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18324,10 +29938,10 @@ GrB_Info GB_Adot2B__min_iseq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_int32
+GrB_Info GB_Adot3B__times_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18335,32 +29949,32 @@ GrB_Info GB_Adot3B__min_iseq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_int32
+GrB_Info GB_Asaxpy3B__times_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_int64
+GrB_Info GB_Adot4B__times_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_int64
+GrB_Info GB_Adot2B__times_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18368,10 +29982,10 @@ GrB_Info GB_Adot2B__min_iseq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_int64
+GrB_Info GB_Adot3B__times_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18379,32 +29993,32 @@ GrB_Info GB_Adot3B__min_iseq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_int64
+GrB_Info GB_Asaxpy3B__times_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_uint8
+GrB_Info GB_Adot4B__times_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_uint8
+GrB_Info GB_Adot2B__min_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18412,10 +30026,10 @@ GrB_Info GB_Adot2B__min_iseq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_uint8
+GrB_Info GB_Adot3B__min_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18423,32 +30037,32 @@ GrB_Info GB_Adot3B__min_iseq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_uint8
+GrB_Info GB_Asaxpy3B__min_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_uint16
+GrB_Info GB_Adot4B__min_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_uint16
+GrB_Info GB_Adot2B__min_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18456,10 +30070,10 @@ GrB_Info GB_Adot2B__min_iseq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_uint16
+GrB_Info GB_Adot3B__min_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18467,32 +30081,32 @@ GrB_Info GB_Adot3B__min_iseq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_uint16
+GrB_Info GB_Asaxpy3B__min_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_uint32
+GrB_Info GB_Adot4B__min_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_uint32
+GrB_Info GB_Adot2B__min_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18500,10 +30114,10 @@ GrB_Info GB_Adot2B__min_iseq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_uint32
+GrB_Info GB_Adot3B__min_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18511,32 +30125,32 @@ GrB_Info GB_Adot3B__min_iseq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_uint32
+GrB_Info GB_Asaxpy3B__min_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_uint64
+GrB_Info GB_Adot4B__min_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_uint64
+GrB_Info GB_Adot2B__min_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18544,10 +30158,10 @@ GrB_Info GB_Adot2B__min_iseq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_uint64
+GrB_Info GB_Adot3B__min_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18555,32 +30169,32 @@ GrB_Info GB_Adot3B__min_iseq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_uint64
+GrB_Info GB_Asaxpy3B__min_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_fp32
+GrB_Info GB_Adot4B__min_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_fp32
+GrB_Info GB_Adot2B__min_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18588,10 +30202,10 @@ GrB_Info GB_Adot2B__min_iseq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_fp32
+GrB_Info GB_Adot3B__min_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18599,32 +30213,32 @@ GrB_Info GB_Adot3B__min_iseq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_fp32
+GrB_Info GB_Asaxpy3B__min_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_iseq_fp64
+GrB_Info GB_Adot4B__min_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_iseq_fp64
+GrB_Info GB_Adot2B__min_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18632,10 +30246,10 @@ GrB_Info GB_Adot2B__min_iseq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_iseq_fp64
+GrB_Info GB_Adot3B__min_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18643,32 +30257,32 @@ GrB_Info GB_Adot3B__min_iseq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_iseq_fp64
+GrB_Info GB_Asaxpy3B__min_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_int8
+GrB_Info GB_Adot4B__min_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_int8
+GrB_Info GB_Adot2B__min_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18676,10 +30290,10 @@ GrB_Info GB_Adot2B__max_iseq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_int8
+GrB_Info GB_Adot3B__min_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18687,32 +30301,32 @@ GrB_Info GB_Adot3B__max_iseq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_int8
+GrB_Info GB_Asaxpy3B__min_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_int16
+GrB_Info GB_Adot4B__min_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_int16
+GrB_Info GB_Adot2B__min_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18720,10 +30334,10 @@ GrB_Info GB_Adot2B__max_iseq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_int16
+GrB_Info GB_Adot3B__min_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18731,32 +30345,32 @@ GrB_Info GB_Adot3B__max_iseq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_int16
+GrB_Info GB_Asaxpy3B__min_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_int32
+GrB_Info GB_Adot4B__min_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_int32
+GrB_Info GB_Adot2B__min_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18764,10 +30378,10 @@ GrB_Info GB_Adot2B__max_iseq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_int32
+GrB_Info GB_Adot3B__min_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18775,32 +30389,32 @@ GrB_Info GB_Adot3B__max_iseq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_int32
+GrB_Info GB_Asaxpy3B__min_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_int64
+GrB_Info GB_Adot4B__min_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_int64
+GrB_Info GB_Adot2B__min_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18808,10 +30422,10 @@ GrB_Info GB_Adot2B__max_iseq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_int64
+GrB_Info GB_Adot3B__min_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18819,32 +30433,32 @@ GrB_Info GB_Adot3B__max_iseq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_int64
+GrB_Info GB_Asaxpy3B__min_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_uint8
+GrB_Info GB_Adot4B__min_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_uint8
+GrB_Info GB_Adot2B__max_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18852,10 +30466,10 @@ GrB_Info GB_Adot2B__max_iseq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_uint8
+GrB_Info GB_Adot3B__max_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18863,32 +30477,32 @@ GrB_Info GB_Adot3B__max_iseq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_uint8
+GrB_Info GB_Asaxpy3B__max_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_uint16
+GrB_Info GB_Adot4B__max_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_uint16
+GrB_Info GB_Adot2B__max_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18896,10 +30510,10 @@ GrB_Info GB_Adot2B__max_iseq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_uint16
+GrB_Info GB_Adot3B__max_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18907,32 +30521,32 @@ GrB_Info GB_Adot3B__max_iseq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_uint16
+GrB_Info GB_Asaxpy3B__max_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_uint32
+GrB_Info GB_Adot4B__max_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_uint32
+GrB_Info GB_Adot2B__max_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18940,10 +30554,10 @@ GrB_Info GB_Adot2B__max_iseq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_uint32
+GrB_Info GB_Adot3B__max_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18951,32 +30565,32 @@ GrB_Info GB_Adot3B__max_iseq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_uint32
+GrB_Info GB_Asaxpy3B__max_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_uint64
+GrB_Info GB_Adot4B__max_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_uint64
+GrB_Info GB_Adot2B__max_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -18984,10 +30598,10 @@ GrB_Info GB_Adot2B__max_iseq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_uint64
+GrB_Info GB_Adot3B__max_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -18995,32 +30609,32 @@ GrB_Info GB_Adot3B__max_iseq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_uint64
+GrB_Info GB_Asaxpy3B__max_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_fp32
+GrB_Info GB_Adot4B__max_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_fp32
+GrB_Info GB_Adot2B__max_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19028,10 +30642,10 @@ GrB_Info GB_Adot2B__max_iseq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_fp32
+GrB_Info GB_Adot3B__max_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19039,32 +30653,32 @@ GrB_Info GB_Adot3B__max_iseq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_fp32
+GrB_Info GB_Asaxpy3B__max_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_iseq_fp64
+GrB_Info GB_Adot4B__max_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_iseq_fp64
+GrB_Info GB_Adot2B__max_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19072,10 +30686,10 @@ GrB_Info GB_Adot2B__max_iseq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_iseq_fp64
+GrB_Info GB_Adot3B__max_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19083,32 +30697,32 @@ GrB_Info GB_Adot3B__max_iseq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_iseq_fp64
+GrB_Info GB_Asaxpy3B__max_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_int8
+GrB_Info GB_Adot4B__max_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_int8
+GrB_Info GB_Adot2B__max_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19116,10 +30730,10 @@ GrB_Info GB_Adot2B__plus_iseq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_int8
+GrB_Info GB_Adot3B__max_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19127,32 +30741,32 @@ GrB_Info GB_Adot3B__plus_iseq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_int8
+GrB_Info GB_Asaxpy3B__max_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_uint8
+GrB_Info GB_Adot4B__max_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_uint8
+GrB_Info GB_Adot2B__max_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19160,10 +30774,10 @@ GrB_Info GB_Adot2B__plus_iseq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_uint8
+GrB_Info GB_Adot3B__max_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19171,32 +30785,32 @@ GrB_Info GB_Adot3B__plus_iseq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_uint8
+GrB_Info GB_Asaxpy3B__max_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_int16
+GrB_Info GB_Adot4B__max_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_int16
+GrB_Info GB_Adot2B__max_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19204,10 +30818,10 @@ GrB_Info GB_Adot2B__plus_iseq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_int16
+GrB_Info GB_Adot3B__max_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19215,32 +30829,32 @@ GrB_Info GB_Adot3B__plus_iseq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_int16
+GrB_Info GB_Asaxpy3B__max_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_uint16
+GrB_Info GB_Adot4B__max_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_uint16
+GrB_Info GB_Adot2B__max_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19248,10 +30862,10 @@ GrB_Info GB_Adot2B__plus_iseq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_uint16
+GrB_Info GB_Adot3B__max_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19259,32 +30873,32 @@ GrB_Info GB_Adot3B__plus_iseq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_uint16
+GrB_Info GB_Asaxpy3B__max_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_int32
+GrB_Info GB_Adot4B__max_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_int32
+GrB_Info GB_Adot2B__any_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19292,10 +30906,10 @@ GrB_Info GB_Adot2B__plus_iseq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_int32
+GrB_Info GB_Adot3B__any_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19303,32 +30917,32 @@ GrB_Info GB_Adot3B__plus_iseq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_int32
+GrB_Info GB_Asaxpy3B__any_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_uint32
+GrB_Info GB_Adot4B__any_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_uint32
+GrB_Info GB_Adot2B__any_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19336,10 +30950,10 @@ GrB_Info GB_Adot2B__plus_iseq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_uint32
+GrB_Info GB_Adot3B__any_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19347,32 +30961,32 @@ GrB_Info GB_Adot3B__plus_iseq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_uint32
+GrB_Info GB_Asaxpy3B__any_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_int64
+GrB_Info GB_Adot4B__any_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_int64
+GrB_Info GB_Adot2B__any_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19380,10 +30994,10 @@ GrB_Info GB_Adot2B__plus_iseq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_int64
+GrB_Info GB_Adot3B__any_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19391,32 +31005,32 @@ GrB_Info GB_Adot3B__plus_iseq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_int64
+GrB_Info GB_Asaxpy3B__any_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_uint64
+GrB_Info GB_Adot4B__any_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_uint64
+GrB_Info GB_Adot2B__any_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19424,10 +31038,10 @@ GrB_Info GB_Adot2B__plus_iseq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_uint64
+GrB_Info GB_Adot3B__any_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19435,32 +31049,32 @@ GrB_Info GB_Adot3B__plus_iseq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_uint64
+GrB_Info GB_Asaxpy3B__any_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_fp32
+GrB_Info GB_Adot4B__any_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_fp32
+GrB_Info GB_Adot2B__any_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19468,10 +31082,10 @@ GrB_Info GB_Adot2B__plus_iseq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_fp32
+GrB_Info GB_Adot3B__any_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19479,32 +31093,32 @@ GrB_Info GB_Adot3B__plus_iseq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_fp32
+GrB_Info GB_Asaxpy3B__any_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_iseq_fp64
+GrB_Info GB_Adot4B__any_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_iseq_fp64
+GrB_Info GB_Adot2B__any_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19512,10 +31126,10 @@ GrB_Info GB_Adot2B__plus_iseq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_iseq_fp64
+GrB_Info GB_Adot3B__any_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19523,32 +31137,32 @@ GrB_Info GB_Adot3B__plus_iseq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_iseq_fp64
+GrB_Info GB_Asaxpy3B__any_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_int8
+GrB_Info GB_Adot4B__any_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_int8
+GrB_Info GB_Adot2B__any_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19556,10 +31170,10 @@ GrB_Info GB_Adot2B__times_iseq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_int8
+GrB_Info GB_Adot3B__any_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19567,32 +31181,32 @@ GrB_Info GB_Adot3B__times_iseq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_int8
+GrB_Info GB_Asaxpy3B__any_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_uint8
+GrB_Info GB_Adot4B__any_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_uint8
+GrB_Info GB_Adot2B__any_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19600,10 +31214,10 @@ GrB_Info GB_Adot2B__times_iseq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_uint8
+GrB_Info GB_Adot3B__any_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19611,32 +31225,32 @@ GrB_Info GB_Adot3B__times_iseq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_uint8
+GrB_Info GB_Asaxpy3B__any_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_int16
+GrB_Info GB_Adot4B__any_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_int16
+GrB_Info GB_Adot2B__any_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19644,10 +31258,10 @@ GrB_Info GB_Adot2B__times_iseq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_int16
+GrB_Info GB_Adot3B__any_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19655,32 +31269,32 @@ GrB_Info GB_Adot3B__times_iseq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_int16
+GrB_Info GB_Asaxpy3B__any_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_uint16
+GrB_Info GB_Adot4B__any_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_uint16
+GrB_Info GB_Adot2B__any_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19688,10 +31302,10 @@ GrB_Info GB_Adot2B__times_iseq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_uint16
+GrB_Info GB_Adot3B__any_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19699,32 +31313,32 @@ GrB_Info GB_Adot3B__times_iseq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_uint16
+GrB_Info GB_Asaxpy3B__any_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_int32
+GrB_Info GB_Adot4B__any_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_int32
+GrB_Info GB_Adot2B__plus_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19732,10 +31346,10 @@ GrB_Info GB_Adot2B__times_iseq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_int32
+GrB_Info GB_Adot3B__plus_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19743,32 +31357,32 @@ GrB_Info GB_Adot3B__times_iseq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_int32
+GrB_Info GB_Asaxpy3B__plus_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_uint32
+GrB_Info GB_Adot4B__plus_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_uint32
+GrB_Info GB_Adot2B__plus_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19776,10 +31390,10 @@ GrB_Info GB_Adot2B__times_iseq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_uint32
+GrB_Info GB_Adot3B__plus_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19787,32 +31401,32 @@ GrB_Info GB_Adot3B__times_iseq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_uint32
+GrB_Info GB_Asaxpy3B__plus_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_int64
+GrB_Info GB_Adot4B__plus_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_int64
+GrB_Info GB_Adot2B__plus_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19820,10 +31434,10 @@ GrB_Info GB_Adot2B__times_iseq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_int64
+GrB_Info GB_Adot3B__plus_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19831,32 +31445,32 @@ GrB_Info GB_Adot3B__times_iseq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_int64
+GrB_Info GB_Asaxpy3B__plus_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_uint64
+GrB_Info GB_Adot4B__plus_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_uint64
+GrB_Info GB_Adot2B__plus_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19864,10 +31478,10 @@ GrB_Info GB_Adot2B__times_iseq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_uint64
+GrB_Info GB_Adot3B__plus_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19875,32 +31489,32 @@ GrB_Info GB_Adot3B__times_iseq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_uint64
+GrB_Info GB_Asaxpy3B__plus_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_fp32
+GrB_Info GB_Adot4B__plus_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_fp32
+GrB_Info GB_Adot2B__plus_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19908,10 +31522,10 @@ GrB_Info GB_Adot2B__times_iseq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_fp32
+GrB_Info GB_Adot3B__plus_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19919,32 +31533,32 @@ GrB_Info GB_Adot3B__times_iseq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_fp32
+GrB_Info GB_Asaxpy3B__plus_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_iseq_fp64
+GrB_Info GB_Adot4B__plus_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_iseq_fp64
+GrB_Info GB_Adot2B__plus_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19952,10 +31566,10 @@ GrB_Info GB_Adot2B__times_iseq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_iseq_fp64
+GrB_Info GB_Adot3B__plus_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -19963,32 +31577,32 @@ GrB_Info GB_Adot3B__times_iseq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_iseq_fp64
+GrB_Info GB_Asaxpy3B__plus_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_int8
+GrB_Info GB_Adot4B__plus_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_int8
+GrB_Info GB_Adot2B__plus_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -19996,10 +31610,10 @@ GrB_Info GB_Adot2B__min_isne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_int8
+GrB_Info GB_Adot3B__plus_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20007,32 +31621,32 @@ GrB_Info GB_Adot3B__min_isne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_int8
+GrB_Info GB_Asaxpy3B__plus_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_int16
+GrB_Info GB_Adot4B__plus_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_int16
+GrB_Info GB_Adot2B__plus_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20040,10 +31654,10 @@ GrB_Info GB_Adot2B__min_isne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_int16
+GrB_Info GB_Adot3B__plus_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20051,32 +31665,32 @@ GrB_Info GB_Adot3B__min_isne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_int16
+GrB_Info GB_Asaxpy3B__plus_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_int32
+GrB_Info GB_Adot4B__plus_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_int32
+GrB_Info GB_Adot2B__plus_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20084,10 +31698,10 @@ GrB_Info GB_Adot2B__min_isne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_int32
+GrB_Info GB_Adot3B__plus_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20095,32 +31709,32 @@ GrB_Info GB_Adot3B__min_isne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_int32
+GrB_Info GB_Asaxpy3B__plus_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_int64
+GrB_Info GB_Adot4B__plus_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_int64
+GrB_Info GB_Adot2B__plus_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20128,10 +31742,10 @@ GrB_Info GB_Adot2B__min_isne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_int64
+GrB_Info GB_Adot3B__plus_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20139,32 +31753,32 @@ GrB_Info GB_Adot3B__min_isne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_int64
+GrB_Info GB_Asaxpy3B__plus_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_uint8
+GrB_Info GB_Adot4B__plus_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_uint8
+GrB_Info GB_Adot2B__times_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20172,10 +31786,10 @@ GrB_Info GB_Adot2B__min_isne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_uint8
+GrB_Info GB_Adot3B__times_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20183,32 +31797,32 @@ GrB_Info GB_Adot3B__min_isne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_uint8
+GrB_Info GB_Asaxpy3B__times_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_uint16
+GrB_Info GB_Adot4B__times_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_uint16
+GrB_Info GB_Adot2B__times_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20216,10 +31830,10 @@ GrB_Info GB_Adot2B__min_isne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_uint16
+GrB_Info GB_Adot3B__times_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20227,32 +31841,32 @@ GrB_Info GB_Adot3B__min_isne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_uint16
+GrB_Info GB_Asaxpy3B__times_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_uint32
+GrB_Info GB_Adot4B__times_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_uint32
+GrB_Info GB_Adot2B__times_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20260,10 +31874,10 @@ GrB_Info GB_Adot2B__min_isne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_uint32
+GrB_Info GB_Adot3B__times_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20271,32 +31885,32 @@ GrB_Info GB_Adot3B__min_isne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_uint32
+GrB_Info GB_Asaxpy3B__times_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_uint64
+GrB_Info GB_Adot4B__times_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_uint64
+GrB_Info GB_Adot2B__times_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20304,10 +31918,10 @@ GrB_Info GB_Adot2B__min_isne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_uint64
+GrB_Info GB_Adot3B__times_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20315,32 +31929,32 @@ GrB_Info GB_Adot3B__min_isne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_uint64
+GrB_Info GB_Asaxpy3B__times_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_fp32
+GrB_Info GB_Adot4B__times_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_fp32
+GrB_Info GB_Adot2B__times_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20348,10 +31962,10 @@ GrB_Info GB_Adot2B__min_isne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_fp32
+GrB_Info GB_Adot3B__times_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20359,32 +31973,32 @@ GrB_Info GB_Adot3B__min_isne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_fp32
+GrB_Info GB_Asaxpy3B__times_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isne_fp64
+GrB_Info GB_Adot4B__times_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isne_fp64
+GrB_Info GB_Adot2B__times_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20392,10 +32006,10 @@ GrB_Info GB_Adot2B__min_isne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isne_fp64
+GrB_Info GB_Adot3B__times_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20403,32 +32017,32 @@ GrB_Info GB_Adot3B__min_isne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isne_fp64
+GrB_Info GB_Asaxpy3B__times_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_int8
+GrB_Info GB_Adot4B__times_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_int8
+GrB_Info GB_Adot2B__times_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20436,10 +32050,10 @@ GrB_Info GB_Adot2B__max_isne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_int8
+GrB_Info GB_Adot3B__times_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20447,32 +32061,32 @@ GrB_Info GB_Adot3B__max_isne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_int8
+GrB_Info GB_Asaxpy3B__times_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_int16
+GrB_Info GB_Adot4B__times_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_int16
+GrB_Info GB_Adot2B__times_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20480,10 +32094,10 @@ GrB_Info GB_Adot2B__max_isne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_int16
+GrB_Info GB_Adot3B__times_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20491,32 +32105,32 @@ GrB_Info GB_Adot3B__max_isne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_int16
+GrB_Info GB_Asaxpy3B__times_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_int32
+GrB_Info GB_Adot4B__times_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_int32
+GrB_Info GB_Adot2B__times_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20524,10 +32138,10 @@ GrB_Info GB_Adot2B__max_isne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_int32
+GrB_Info GB_Adot3B__times_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20535,32 +32149,32 @@ GrB_Info GB_Adot3B__max_isne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_int32
+GrB_Info GB_Asaxpy3B__times_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_int64
+GrB_Info GB_Adot4B__times_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_int64
+GrB_Info GB_Adot2B__times_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20568,10 +32182,10 @@ GrB_Info GB_Adot2B__max_isne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_int64
+GrB_Info GB_Adot3B__times_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20579,32 +32193,32 @@ GrB_Info GB_Adot3B__max_isne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_int64
+GrB_Info GB_Asaxpy3B__times_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_uint8
+GrB_Info GB_Adot4B__times_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_uint8
+GrB_Info GB_Adot2B__min_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20612,10 +32226,10 @@ GrB_Info GB_Adot2B__max_isne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_uint8
+GrB_Info GB_Adot3B__min_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20623,32 +32237,32 @@ GrB_Info GB_Adot3B__max_isne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_uint8
+GrB_Info GB_Asaxpy3B__min_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_uint16
+GrB_Info GB_Adot4B__min_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_uint16
+GrB_Info GB_Adot2B__min_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20656,10 +32270,10 @@ GrB_Info GB_Adot2B__max_isne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_uint16
+GrB_Info GB_Adot3B__min_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20667,32 +32281,32 @@ GrB_Info GB_Adot3B__max_isne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_uint16
+GrB_Info GB_Asaxpy3B__min_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_uint32
+GrB_Info GB_Adot4B__min_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_uint32
+GrB_Info GB_Adot2B__min_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20700,10 +32314,10 @@ GrB_Info GB_Adot2B__max_isne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_uint32
+GrB_Info GB_Adot3B__min_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20711,32 +32325,32 @@ GrB_Info GB_Adot3B__max_isne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_uint32
+GrB_Info GB_Asaxpy3B__min_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_uint64
+GrB_Info GB_Adot4B__min_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_uint64
+GrB_Info GB_Adot2B__min_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20744,10 +32358,10 @@ GrB_Info GB_Adot2B__max_isne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_uint64
+GrB_Info GB_Adot3B__min_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20755,32 +32369,32 @@ GrB_Info GB_Adot3B__max_isne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_uint64
+GrB_Info GB_Asaxpy3B__min_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_fp32
+GrB_Info GB_Adot4B__min_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_fp32
+GrB_Info GB_Adot2B__min_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20788,10 +32402,10 @@ GrB_Info GB_Adot2B__max_isne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_fp32
+GrB_Info GB_Adot3B__min_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20799,32 +32413,32 @@ GrB_Info GB_Adot3B__max_isne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_fp32
+GrB_Info GB_Asaxpy3B__min_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isne_fp64
+GrB_Info GB_Adot4B__min_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isne_fp64
+GrB_Info GB_Adot2B__min_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20832,10 +32446,10 @@ GrB_Info GB_Adot2B__max_isne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isne_fp64
+GrB_Info GB_Adot3B__min_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20843,32 +32457,32 @@ GrB_Info GB_Adot3B__max_isne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isne_fp64
+GrB_Info GB_Asaxpy3B__min_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_int8
+GrB_Info GB_Adot4B__min_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_int8
+GrB_Info GB_Adot2B__min_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20876,10 +32490,10 @@ GrB_Info GB_Adot2B__plus_isne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_int8
+GrB_Info GB_Adot3B__min_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20887,32 +32501,32 @@ GrB_Info GB_Adot3B__plus_isne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_int8
+GrB_Info GB_Asaxpy3B__min_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_uint8
+GrB_Info GB_Adot4B__min_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_uint8
+GrB_Info GB_Adot2B__min_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20920,10 +32534,10 @@ GrB_Info GB_Adot2B__plus_isne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_uint8
+GrB_Info GB_Adot3B__min_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20931,32 +32545,32 @@ GrB_Info GB_Adot3B__plus_isne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_uint8
+GrB_Info GB_Asaxpy3B__min_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_int16
+GrB_Info GB_Adot4B__min_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_int16
+GrB_Info GB_Adot2B__min_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -20964,10 +32578,10 @@ GrB_Info GB_Adot2B__plus_isne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_int16
+GrB_Info GB_Adot3B__min_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -20975,32 +32589,32 @@ GrB_Info GB_Adot3B__plus_isne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_int16
+GrB_Info GB_Asaxpy3B__min_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_uint16
+GrB_Info GB_Adot4B__min_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_uint16
+GrB_Info GB_Adot2B__min_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21008,10 +32622,10 @@ GrB_Info GB_Adot2B__plus_isne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_uint16
+GrB_Info GB_Adot3B__min_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21019,32 +32633,32 @@ GrB_Info GB_Adot3B__plus_isne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_uint16
+GrB_Info GB_Asaxpy3B__min_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_int32
+GrB_Info GB_Adot4B__min_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_int32
+GrB_Info GB_Adot2B__max_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21052,10 +32666,10 @@ GrB_Info GB_Adot2B__plus_isne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_int32
+GrB_Info GB_Adot3B__max_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21063,32 +32677,32 @@ GrB_Info GB_Adot3B__plus_isne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_int32
+GrB_Info GB_Asaxpy3B__max_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_uint32
+GrB_Info GB_Adot4B__max_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_uint32
+GrB_Info GB_Adot2B__max_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21096,10 +32710,10 @@ GrB_Info GB_Adot2B__plus_isne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_uint32
+GrB_Info GB_Adot3B__max_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21107,32 +32721,32 @@ GrB_Info GB_Adot3B__plus_isne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_uint32
+GrB_Info GB_Asaxpy3B__max_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_int64
+GrB_Info GB_Adot4B__max_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_int64
+GrB_Info GB_Adot2B__max_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21140,10 +32754,10 @@ GrB_Info GB_Adot2B__plus_isne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_int64
+GrB_Info GB_Adot3B__max_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21151,32 +32765,32 @@ GrB_Info GB_Adot3B__plus_isne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_int64
+GrB_Info GB_Asaxpy3B__max_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_uint64
+GrB_Info GB_Adot4B__max_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_uint64
+GrB_Info GB_Adot2B__max_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21184,10 +32798,10 @@ GrB_Info GB_Adot2B__plus_isne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_uint64
+GrB_Info GB_Adot3B__max_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21195,32 +32809,32 @@ GrB_Info GB_Adot3B__plus_isne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_uint64
+GrB_Info GB_Asaxpy3B__max_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_fp32
+GrB_Info GB_Adot4B__max_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_fp32
+GrB_Info GB_Adot2B__max_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21228,10 +32842,10 @@ GrB_Info GB_Adot2B__plus_isne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_fp32
+GrB_Info GB_Adot3B__max_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21239,32 +32853,32 @@ GrB_Info GB_Adot3B__plus_isne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_fp32
+GrB_Info GB_Asaxpy3B__max_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isne_fp64
+GrB_Info GB_Adot4B__max_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isne_fp64
+GrB_Info GB_Adot2B__max_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21272,10 +32886,10 @@ GrB_Info GB_Adot2B__plus_isne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isne_fp64
+GrB_Info GB_Adot3B__max_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21283,32 +32897,32 @@ GrB_Info GB_Adot3B__plus_isne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isne_fp64
+GrB_Info GB_Asaxpy3B__max_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_int8
+GrB_Info GB_Adot4B__max_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_int8
+GrB_Info GB_Adot2B__max_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21316,10 +32930,10 @@ GrB_Info GB_Adot2B__times_isne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_int8
+GrB_Info GB_Adot3B__max_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21327,32 +32941,32 @@ GrB_Info GB_Adot3B__times_isne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_int8
+GrB_Info GB_Asaxpy3B__max_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_uint8
+GrB_Info GB_Adot4B__max_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_uint8
+GrB_Info GB_Adot2B__max_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21360,10 +32974,10 @@ GrB_Info GB_Adot2B__times_isne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_uint8
+GrB_Info GB_Adot3B__max_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21371,32 +32985,32 @@ GrB_Info GB_Adot3B__times_isne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_uint8
+GrB_Info GB_Asaxpy3B__max_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_int16
+GrB_Info GB_Adot4B__max_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_int16
+GrB_Info GB_Adot2B__max_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21404,10 +33018,10 @@ GrB_Info GB_Adot2B__times_isne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_int16
+GrB_Info GB_Adot3B__max_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21415,32 +33029,32 @@ GrB_Info GB_Adot3B__times_isne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_int16
+GrB_Info GB_Asaxpy3B__max_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_uint16
+GrB_Info GB_Adot4B__max_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_uint16
+GrB_Info GB_Adot2B__max_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21448,10 +33062,10 @@ GrB_Info GB_Adot2B__times_isne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_uint16
+GrB_Info GB_Adot3B__max_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21459,32 +33073,32 @@ GrB_Info GB_Adot3B__times_isne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_uint16
+GrB_Info GB_Asaxpy3B__max_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_int32
+GrB_Info GB_Adot4B__max_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_int32
+GrB_Info GB_Adot2B__any_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21492,10 +33106,10 @@ GrB_Info GB_Adot2B__times_isne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_int32
+GrB_Info GB_Adot3B__any_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21503,32 +33117,32 @@ GrB_Info GB_Adot3B__times_isne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_int32
+GrB_Info GB_Asaxpy3B__any_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_uint32
+GrB_Info GB_Adot4B__any_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_uint32
+GrB_Info GB_Adot2B__any_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21536,10 +33150,10 @@ GrB_Info GB_Adot2B__times_isne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_uint32
+GrB_Info GB_Adot3B__any_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21547,32 +33161,32 @@ GrB_Info GB_Adot3B__times_isne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_uint32
+GrB_Info GB_Asaxpy3B__any_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_int64
+GrB_Info GB_Adot4B__any_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_int64
+GrB_Info GB_Adot2B__any_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21580,10 +33194,10 @@ GrB_Info GB_Adot2B__times_isne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_int64
+GrB_Info GB_Adot3B__any_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21591,32 +33205,32 @@ GrB_Info GB_Adot3B__times_isne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_int64
+GrB_Info GB_Asaxpy3B__any_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_uint64
+GrB_Info GB_Adot4B__any_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_uint64
+GrB_Info GB_Adot2B__any_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21624,10 +33238,10 @@ GrB_Info GB_Adot2B__times_isne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_uint64
+GrB_Info GB_Adot3B__any_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21635,32 +33249,32 @@ GrB_Info GB_Adot3B__times_isne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_uint64
+GrB_Info GB_Asaxpy3B__any_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_fp32
+GrB_Info GB_Adot4B__any_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_fp32
+GrB_Info GB_Adot2B__any_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21668,10 +33282,10 @@ GrB_Info GB_Adot2B__times_isne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_fp32
+GrB_Info GB_Adot3B__any_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21679,32 +33293,32 @@ GrB_Info GB_Adot3B__times_isne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_fp32
+GrB_Info GB_Asaxpy3B__any_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isne_fp64
+GrB_Info GB_Adot4B__any_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isne_fp64
+GrB_Info GB_Adot2B__any_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21712,10 +33326,10 @@ GrB_Info GB_Adot2B__times_isne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isne_fp64
+GrB_Info GB_Adot3B__any_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21723,32 +33337,32 @@ GrB_Info GB_Adot3B__times_isne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isne_fp64
+GrB_Info GB_Asaxpy3B__any_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_int8
+GrB_Info GB_Adot4B__any_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_int8
+GrB_Info GB_Adot2B__any_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21756,10 +33370,10 @@ GrB_Info GB_Adot2B__min_isgt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_int8
+GrB_Info GB_Adot3B__any_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21767,32 +33381,32 @@ GrB_Info GB_Adot3B__min_isgt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_int8
+GrB_Info GB_Asaxpy3B__any_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_int16
+GrB_Info GB_Adot4B__any_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_int16
+GrB_Info GB_Adot2B__any_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21800,10 +33414,10 @@ GrB_Info GB_Adot2B__min_isgt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_int16
+GrB_Info GB_Adot3B__any_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21811,32 +33425,32 @@ GrB_Info GB_Adot3B__min_isgt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_int16
+GrB_Info GB_Asaxpy3B__any_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_int32
+GrB_Info GB_Adot4B__any_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_int32
+GrB_Info GB_Adot2B__any_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21844,10 +33458,10 @@ GrB_Info GB_Adot2B__min_isgt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_int32
+GrB_Info GB_Adot3B__any_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21855,32 +33469,32 @@ GrB_Info GB_Adot3B__min_isgt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_int32
+GrB_Info GB_Asaxpy3B__any_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_int64
+GrB_Info GB_Adot4B__any_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_int64
+GrB_Info GB_Adot2B__any_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21888,10 +33502,10 @@ GrB_Info GB_Adot2B__min_isgt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_int64
+GrB_Info GB_Adot3B__any_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21899,32 +33513,32 @@ GrB_Info GB_Adot3B__min_isgt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_int64
+GrB_Info GB_Asaxpy3B__any_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_uint8
+GrB_Info GB_Adot4B__any_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_uint8
+GrB_Info GB_Adot2B__plus_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21932,10 +33546,10 @@ GrB_Info GB_Adot2B__min_isgt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_uint8
+GrB_Info GB_Adot3B__plus_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21943,32 +33557,32 @@ GrB_Info GB_Adot3B__min_isgt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_uint8
+GrB_Info GB_Asaxpy3B__plus_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_uint16
+GrB_Info GB_Adot4B__plus_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_uint16
+GrB_Info GB_Adot2B__plus_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21976,10 +33590,10 @@ GrB_Info GB_Adot2B__min_isgt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_uint16
+GrB_Info GB_Adot3B__plus_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -21987,32 +33601,32 @@ GrB_Info GB_Adot3B__min_isgt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_uint16
+GrB_Info GB_Asaxpy3B__plus_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_uint32
+GrB_Info GB_Adot4B__plus_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_uint32
+GrB_Info GB_Adot2B__plus_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22020,10 +33634,10 @@ GrB_Info GB_Adot2B__min_isgt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_uint32
+GrB_Info GB_Adot3B__plus_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22031,32 +33645,32 @@ GrB_Info GB_Adot3B__min_isgt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_uint32
+GrB_Info GB_Asaxpy3B__plus_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_uint64
+GrB_Info GB_Adot4B__plus_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_uint64
+GrB_Info GB_Adot2B__plus_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22064,10 +33678,10 @@ GrB_Info GB_Adot2B__min_isgt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_uint64
+GrB_Info GB_Adot3B__plus_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22075,32 +33689,32 @@ GrB_Info GB_Adot3B__min_isgt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_uint64
+GrB_Info GB_Asaxpy3B__plus_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_fp32
+GrB_Info GB_Adot4B__plus_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_fp32
+GrB_Info GB_Adot2B__plus_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22108,10 +33722,10 @@ GrB_Info GB_Adot2B__min_isgt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_fp32
+GrB_Info GB_Adot3B__plus_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22119,32 +33733,32 @@ GrB_Info GB_Adot3B__min_isgt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_fp32
+GrB_Info GB_Asaxpy3B__plus_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isgt_fp64
+GrB_Info GB_Adot4B__plus_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isgt_fp64
+GrB_Info GB_Adot2B__plus_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22152,10 +33766,10 @@ GrB_Info GB_Adot2B__min_isgt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isgt_fp64
+GrB_Info GB_Adot3B__plus_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22163,32 +33777,32 @@ GrB_Info GB_Adot3B__min_isgt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isgt_fp64
+GrB_Info GB_Asaxpy3B__plus_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_int8
+GrB_Info GB_Adot4B__plus_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_int8
+GrB_Info GB_Adot2B__plus_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22196,10 +33810,10 @@ GrB_Info GB_Adot2B__max_isgt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_int8
+GrB_Info GB_Adot3B__plus_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22207,32 +33821,32 @@ GrB_Info GB_Adot3B__max_isgt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_int8
+GrB_Info GB_Asaxpy3B__plus_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_int16
+GrB_Info GB_Adot4B__plus_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_int16
+GrB_Info GB_Adot2B__plus_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22240,10 +33854,10 @@ GrB_Info GB_Adot2B__max_isgt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_int16
+GrB_Info GB_Adot3B__plus_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22251,32 +33865,32 @@ GrB_Info GB_Adot3B__max_isgt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_int16
+GrB_Info GB_Asaxpy3B__plus_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_int32
+GrB_Info GB_Adot4B__plus_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_int32
+GrB_Info GB_Adot2B__plus_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22284,10 +33898,10 @@ GrB_Info GB_Adot2B__max_isgt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_int32
+GrB_Info GB_Adot3B__plus_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22295,32 +33909,32 @@ GrB_Info GB_Adot3B__max_isgt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_int32
+GrB_Info GB_Asaxpy3B__plus_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_int64
+GrB_Info GB_Adot4B__plus_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_int64
+GrB_Info GB_Adot2B__plus_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22328,10 +33942,10 @@ GrB_Info GB_Adot2B__max_isgt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_int64
+GrB_Info GB_Adot3B__plus_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22339,32 +33953,32 @@ GrB_Info GB_Adot3B__max_isgt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_int64
+GrB_Info GB_Asaxpy3B__plus_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_uint8
+GrB_Info GB_Adot4B__plus_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_uint8
+GrB_Info GB_Adot2B__times_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22372,10 +33986,10 @@ GrB_Info GB_Adot2B__max_isgt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_uint8
+GrB_Info GB_Adot3B__times_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22383,32 +33997,32 @@ GrB_Info GB_Adot3B__max_isgt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_uint8
+GrB_Info GB_Asaxpy3B__times_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_uint16
+GrB_Info GB_Adot4B__times_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_uint16
+GrB_Info GB_Adot2B__times_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22416,10 +34030,10 @@ GrB_Info GB_Adot2B__max_isgt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_uint16
+GrB_Info GB_Adot3B__times_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22427,32 +34041,32 @@ GrB_Info GB_Adot3B__max_isgt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_uint16
+GrB_Info GB_Asaxpy3B__times_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_uint32
+GrB_Info GB_Adot4B__times_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_uint32
+GrB_Info GB_Adot2B__times_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22460,10 +34074,10 @@ GrB_Info GB_Adot2B__max_isgt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_uint32
+GrB_Info GB_Adot3B__times_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22471,32 +34085,32 @@ GrB_Info GB_Adot3B__max_isgt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_uint32
+GrB_Info GB_Asaxpy3B__times_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_uint64
+GrB_Info GB_Adot4B__times_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_uint64
+GrB_Info GB_Adot2B__times_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22504,10 +34118,10 @@ GrB_Info GB_Adot2B__max_isgt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_uint64
+GrB_Info GB_Adot3B__times_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22515,32 +34129,32 @@ GrB_Info GB_Adot3B__max_isgt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_uint64
+GrB_Info GB_Asaxpy3B__times_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_fp32
+GrB_Info GB_Adot4B__times_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_fp32
+GrB_Info GB_Adot2B__times_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22548,10 +34162,10 @@ GrB_Info GB_Adot2B__max_isgt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_fp32
+GrB_Info GB_Adot3B__times_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22559,32 +34173,32 @@ GrB_Info GB_Adot3B__max_isgt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_fp32
+GrB_Info GB_Asaxpy3B__times_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isgt_fp64
+GrB_Info GB_Adot4B__times_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isgt_fp64
+GrB_Info GB_Adot2B__times_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22592,10 +34206,10 @@ GrB_Info GB_Adot2B__max_isgt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isgt_fp64
+GrB_Info GB_Adot3B__times_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22603,32 +34217,32 @@ GrB_Info GB_Adot3B__max_isgt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isgt_fp64
+GrB_Info GB_Asaxpy3B__times_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_int8
+GrB_Info GB_Adot4B__times_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_int8
+GrB_Info GB_Adot2B__times_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22636,10 +34250,10 @@ GrB_Info GB_Adot2B__plus_isgt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_int8
+GrB_Info GB_Adot3B__times_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22647,32 +34261,32 @@ GrB_Info GB_Adot3B__plus_isgt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_int8
+GrB_Info GB_Asaxpy3B__times_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_uint8
+GrB_Info GB_Adot4B__times_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_uint8
+GrB_Info GB_Adot2B__times_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22680,10 +34294,10 @@ GrB_Info GB_Adot2B__plus_isgt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_uint8
+GrB_Info GB_Adot3B__times_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22691,32 +34305,32 @@ GrB_Info GB_Adot3B__plus_isgt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_uint8
+GrB_Info GB_Asaxpy3B__times_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_int16
+GrB_Info GB_Adot4B__times_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_int16
+GrB_Info GB_Adot2B__times_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22724,10 +34338,10 @@ GrB_Info GB_Adot2B__plus_isgt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_int16
+GrB_Info GB_Adot3B__times_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22735,32 +34349,32 @@ GrB_Info GB_Adot3B__plus_isgt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_int16
+GrB_Info GB_Asaxpy3B__times_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_uint16
+GrB_Info GB_Adot4B__times_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_uint16
+GrB_Info GB_Adot2B__times_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22768,10 +34382,10 @@ GrB_Info GB_Adot2B__plus_isgt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_uint16
+GrB_Info GB_Adot3B__times_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22779,32 +34393,32 @@ GrB_Info GB_Adot3B__plus_isgt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_uint16
+GrB_Info GB_Asaxpy3B__times_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_int32
+GrB_Info GB_Adot4B__times_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_int32
+GrB_Info GB_Adot2B__min_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22812,10 +34426,10 @@ GrB_Info GB_Adot2B__plus_isgt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_int32
+GrB_Info GB_Adot3B__min_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22823,32 +34437,32 @@ GrB_Info GB_Adot3B__plus_isgt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_int32
+GrB_Info GB_Asaxpy3B__min_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_uint32
+GrB_Info GB_Adot4B__min_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_uint32
+GrB_Info GB_Adot2B__min_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22856,10 +34470,10 @@ GrB_Info GB_Adot2B__plus_isgt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_uint32
+GrB_Info GB_Adot3B__min_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22867,32 +34481,32 @@ GrB_Info GB_Adot3B__plus_isgt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_uint32
+GrB_Info GB_Asaxpy3B__min_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_int64
+GrB_Info GB_Adot4B__min_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_int64
+GrB_Info GB_Adot2B__min_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22900,10 +34514,10 @@ GrB_Info GB_Adot2B__plus_isgt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_int64
+GrB_Info GB_Adot3B__min_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22911,32 +34525,32 @@ GrB_Info GB_Adot3B__plus_isgt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_int64
+GrB_Info GB_Asaxpy3B__min_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_uint64
+GrB_Info GB_Adot4B__min_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_uint64
+GrB_Info GB_Adot2B__min_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22944,10 +34558,10 @@ GrB_Info GB_Adot2B__plus_isgt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_uint64
+GrB_Info GB_Adot3B__min_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22955,32 +34569,32 @@ GrB_Info GB_Adot3B__plus_isgt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_uint64
+GrB_Info GB_Asaxpy3B__min_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_fp32
+GrB_Info GB_Adot4B__min_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_fp32
+GrB_Info GB_Adot2B__min_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -22988,10 +34602,10 @@ GrB_Info GB_Adot2B__plus_isgt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_fp32
+GrB_Info GB_Adot3B__min_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -22999,32 +34613,32 @@ GrB_Info GB_Adot3B__plus_isgt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_fp32
+GrB_Info GB_Asaxpy3B__min_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isgt_fp64
+GrB_Info GB_Adot4B__min_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isgt_fp64
+GrB_Info GB_Adot2B__min_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23032,10 +34646,10 @@ GrB_Info GB_Adot2B__plus_isgt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isgt_fp64
+GrB_Info GB_Adot3B__min_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23043,32 +34657,32 @@ GrB_Info GB_Adot3B__plus_isgt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isgt_fp64
+GrB_Info GB_Asaxpy3B__min_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_int8
+GrB_Info GB_Adot4B__min_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_int8
+GrB_Info GB_Adot2B__min_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23076,10 +34690,10 @@ GrB_Info GB_Adot2B__times_isgt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_int8
+GrB_Info GB_Adot3B__min_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23087,32 +34701,32 @@ GrB_Info GB_Adot3B__times_isgt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_int8
+GrB_Info GB_Asaxpy3B__min_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_uint8
+GrB_Info GB_Adot4B__min_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_uint8
+GrB_Info GB_Adot2B__min_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23120,10 +34734,10 @@ GrB_Info GB_Adot2B__times_isgt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_uint8
+GrB_Info GB_Adot3B__min_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23131,32 +34745,32 @@ GrB_Info GB_Adot3B__times_isgt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_uint8
+GrB_Info GB_Asaxpy3B__min_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_int16
+GrB_Info GB_Adot4B__min_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_int16
+GrB_Info GB_Adot2B__min_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23164,10 +34778,10 @@ GrB_Info GB_Adot2B__times_isgt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_int16
+GrB_Info GB_Adot3B__min_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23175,32 +34789,32 @@ GrB_Info GB_Adot3B__times_isgt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_int16
+GrB_Info GB_Asaxpy3B__min_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_uint16
+GrB_Info GB_Adot4B__min_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_uint16
+GrB_Info GB_Adot2B__min_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23208,10 +34822,10 @@ GrB_Info GB_Adot2B__times_isgt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_uint16
+GrB_Info GB_Adot3B__min_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23219,32 +34833,32 @@ GrB_Info GB_Adot3B__times_isgt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_uint16
+GrB_Info GB_Asaxpy3B__min_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_int32
+GrB_Info GB_Adot4B__min_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_int32
+GrB_Info GB_Adot2B__max_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23252,10 +34866,10 @@ GrB_Info GB_Adot2B__times_isgt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_int32
+GrB_Info GB_Adot3B__max_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23263,32 +34877,32 @@ GrB_Info GB_Adot3B__times_isgt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_int32
+GrB_Info GB_Asaxpy3B__max_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_uint32
+GrB_Info GB_Adot4B__max_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_uint32
+GrB_Info GB_Adot2B__max_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23296,10 +34910,10 @@ GrB_Info GB_Adot2B__times_isgt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_uint32
+GrB_Info GB_Adot3B__max_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23307,32 +34921,32 @@ GrB_Info GB_Adot3B__times_isgt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_uint32
+GrB_Info GB_Asaxpy3B__max_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_int64
+GrB_Info GB_Adot4B__max_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_int64
+GrB_Info GB_Adot2B__max_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23340,10 +34954,10 @@ GrB_Info GB_Adot2B__times_isgt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_int64
+GrB_Info GB_Adot3B__max_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23351,32 +34965,32 @@ GrB_Info GB_Adot3B__times_isgt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_int64
+GrB_Info GB_Asaxpy3B__max_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_uint64
+GrB_Info GB_Adot4B__max_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_uint64
+GrB_Info GB_Adot2B__max_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23384,10 +34998,10 @@ GrB_Info GB_Adot2B__times_isgt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_uint64
+GrB_Info GB_Adot3B__max_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23395,32 +35009,32 @@ GrB_Info GB_Adot3B__times_isgt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_uint64
+GrB_Info GB_Asaxpy3B__max_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_fp32
+GrB_Info GB_Adot4B__max_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_fp32
+GrB_Info GB_Adot2B__max_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23428,10 +35042,10 @@ GrB_Info GB_Adot2B__times_isgt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_fp32
+GrB_Info GB_Adot3B__max_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23439,32 +35053,32 @@ GrB_Info GB_Adot3B__times_isgt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_fp32
+GrB_Info GB_Asaxpy3B__max_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isgt_fp64
+GrB_Info GB_Adot4B__max_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isgt_fp64
+GrB_Info GB_Adot2B__max_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23472,10 +35086,10 @@ GrB_Info GB_Adot2B__times_isgt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isgt_fp64
+GrB_Info GB_Adot3B__max_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23483,32 +35097,32 @@ GrB_Info GB_Adot3B__times_isgt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isgt_fp64
+GrB_Info GB_Asaxpy3B__max_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_int8
+GrB_Info GB_Adot4B__max_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_int8
+GrB_Info GB_Adot2B__max_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23516,10 +35130,10 @@ GrB_Info GB_Adot2B__min_islt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_int8
+GrB_Info GB_Adot3B__max_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23527,32 +35141,32 @@ GrB_Info GB_Adot3B__min_islt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_int8
+GrB_Info GB_Asaxpy3B__max_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_int16
+GrB_Info GB_Adot4B__max_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_int16
+GrB_Info GB_Adot2B__max_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23560,10 +35174,10 @@ GrB_Info GB_Adot2B__min_islt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_int16
+GrB_Info GB_Adot3B__max_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23571,32 +35185,32 @@ GrB_Info GB_Adot3B__min_islt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_int16
+GrB_Info GB_Asaxpy3B__max_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_int32
+GrB_Info GB_Adot4B__max_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_int32
+GrB_Info GB_Adot2B__max_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23604,10 +35218,10 @@ GrB_Info GB_Adot2B__min_islt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_int32
+GrB_Info GB_Adot3B__max_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23615,32 +35229,32 @@ GrB_Info GB_Adot3B__min_islt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_int32
+GrB_Info GB_Asaxpy3B__max_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_int64
+GrB_Info GB_Adot4B__max_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_int64
+GrB_Info GB_Adot2B__max_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23648,10 +35262,10 @@ GrB_Info GB_Adot2B__min_islt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_int64
+GrB_Info GB_Adot3B__max_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23659,32 +35273,32 @@ GrB_Info GB_Adot3B__min_islt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_int64
+GrB_Info GB_Asaxpy3B__max_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_uint8
+GrB_Info GB_Adot4B__max_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_uint8
+GrB_Info GB_Adot2B__any_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23692,10 +35306,10 @@ GrB_Info GB_Adot2B__min_islt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_uint8
+GrB_Info GB_Adot3B__any_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23703,32 +35317,32 @@ GrB_Info GB_Adot3B__min_islt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_uint8
+GrB_Info GB_Asaxpy3B__any_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_uint16
+GrB_Info GB_Adot4B__any_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_uint16
+GrB_Info GB_Adot2B__any_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23736,10 +35350,10 @@ GrB_Info GB_Adot2B__min_islt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_uint16
+GrB_Info GB_Adot3B__any_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23747,32 +35361,32 @@ GrB_Info GB_Adot3B__min_islt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_uint16
+GrB_Info GB_Asaxpy3B__any_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_uint32
+GrB_Info GB_Adot4B__any_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_uint32
+GrB_Info GB_Adot2B__any_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23780,10 +35394,10 @@ GrB_Info GB_Adot2B__min_islt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_uint32
+GrB_Info GB_Adot3B__any_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23791,32 +35405,32 @@ GrB_Info GB_Adot3B__min_islt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_uint32
+GrB_Info GB_Asaxpy3B__any_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_uint64
+GrB_Info GB_Adot4B__any_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_uint64
+GrB_Info GB_Adot2B__any_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23824,10 +35438,10 @@ GrB_Info GB_Adot2B__min_islt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_uint64
+GrB_Info GB_Adot3B__any_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23835,32 +35449,32 @@ GrB_Info GB_Adot3B__min_islt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_uint64
+GrB_Info GB_Asaxpy3B__any_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_fp32
+GrB_Info GB_Adot4B__any_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_fp32
+GrB_Info GB_Adot2B__any_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23868,10 +35482,10 @@ GrB_Info GB_Adot2B__min_islt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_fp32
+GrB_Info GB_Adot3B__any_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23879,32 +35493,32 @@ GrB_Info GB_Adot3B__min_islt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_fp32
+GrB_Info GB_Asaxpy3B__any_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_islt_fp64
+GrB_Info GB_Adot4B__any_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_islt_fp64
+GrB_Info GB_Adot2B__any_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23912,10 +35526,10 @@ GrB_Info GB_Adot2B__min_islt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_islt_fp64
+GrB_Info GB_Adot3B__any_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23923,32 +35537,32 @@ GrB_Info GB_Adot3B__min_islt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_islt_fp64
+GrB_Info GB_Asaxpy3B__any_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_int8
+GrB_Info GB_Adot4B__any_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_int8
+GrB_Info GB_Adot2B__any_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -23956,10 +35570,10 @@ GrB_Info GB_Adot2B__max_islt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_int8
+GrB_Info GB_Adot3B__any_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -23967,32 +35581,32 @@ GrB_Info GB_Adot3B__max_islt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_int8
+GrB_Info GB_Asaxpy3B__any_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_int16
+GrB_Info GB_Adot4B__any_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_int16
+GrB_Info GB_Adot2B__any_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24000,10 +35614,10 @@ GrB_Info GB_Adot2B__max_islt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_int16
+GrB_Info GB_Adot3B__any_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24011,32 +35625,32 @@ GrB_Info GB_Adot3B__max_islt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_int16
+GrB_Info GB_Asaxpy3B__any_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_int32
+GrB_Info GB_Adot4B__any_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_int32
+GrB_Info GB_Adot2B__any_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24044,10 +35658,10 @@ GrB_Info GB_Adot2B__max_islt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_int32
+GrB_Info GB_Adot3B__any_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24055,32 +35669,32 @@ GrB_Info GB_Adot3B__max_islt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_int32
+GrB_Info GB_Asaxpy3B__any_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_int64
+GrB_Info GB_Adot4B__any_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_int64
+GrB_Info GB_Adot2B__any_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24088,10 +35702,10 @@ GrB_Info GB_Adot2B__max_islt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_int64
+GrB_Info GB_Adot3B__any_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24099,32 +35713,32 @@ GrB_Info GB_Adot3B__max_islt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_int64
+GrB_Info GB_Asaxpy3B__any_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_uint8
+GrB_Info GB_Adot4B__any_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_uint8
+GrB_Info GB_Adot2B__plus_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24132,10 +35746,10 @@ GrB_Info GB_Adot2B__max_islt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_uint8
+GrB_Info GB_Adot3B__plus_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24143,32 +35757,32 @@ GrB_Info GB_Adot3B__max_islt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_uint8
+GrB_Info GB_Asaxpy3B__plus_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_uint16
+GrB_Info GB_Adot4B__plus_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_uint16
+GrB_Info GB_Adot2B__plus_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24176,10 +35790,10 @@ GrB_Info GB_Adot2B__max_islt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_uint16
+GrB_Info GB_Adot3B__plus_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24187,32 +35801,32 @@ GrB_Info GB_Adot3B__max_islt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_uint16
+GrB_Info GB_Asaxpy3B__plus_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_uint32
+GrB_Info GB_Adot4B__plus_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_uint32
+GrB_Info GB_Adot2B__plus_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24220,10 +35834,10 @@ GrB_Info GB_Adot2B__max_islt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_uint32
+GrB_Info GB_Adot3B__plus_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24231,32 +35845,32 @@ GrB_Info GB_Adot3B__max_islt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_uint32
+GrB_Info GB_Asaxpy3B__plus_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_uint64
+GrB_Info GB_Adot4B__plus_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_uint64
+GrB_Info GB_Adot2B__plus_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24264,10 +35878,10 @@ GrB_Info GB_Adot2B__max_islt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_uint64
+GrB_Info GB_Adot3B__plus_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24275,32 +35889,32 @@ GrB_Info GB_Adot3B__max_islt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_uint64
+GrB_Info GB_Asaxpy3B__plus_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_fp32
+GrB_Info GB_Adot4B__plus_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_fp32
+GrB_Info GB_Adot2B__plus_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24308,10 +35922,10 @@ GrB_Info GB_Adot2B__max_islt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_fp32
+GrB_Info GB_Adot3B__plus_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24319,32 +35933,32 @@ GrB_Info GB_Adot3B__max_islt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_fp32
+GrB_Info GB_Asaxpy3B__plus_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_islt_fp64
+GrB_Info GB_Adot4B__plus_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_islt_fp64
+GrB_Info GB_Adot2B__plus_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24352,10 +35966,10 @@ GrB_Info GB_Adot2B__max_islt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_islt_fp64
+GrB_Info GB_Adot3B__plus_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24363,32 +35977,32 @@ GrB_Info GB_Adot3B__max_islt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_islt_fp64
+GrB_Info GB_Asaxpy3B__plus_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_int8
+GrB_Info GB_Adot4B__plus_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_int8
+GrB_Info GB_Adot2B__plus_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24396,10 +36010,10 @@ GrB_Info GB_Adot2B__plus_islt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_int8
+GrB_Info GB_Adot3B__plus_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24407,32 +36021,32 @@ GrB_Info GB_Adot3B__plus_islt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_int8
+GrB_Info GB_Asaxpy3B__plus_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_uint8
+GrB_Info GB_Adot4B__plus_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_uint8
+GrB_Info GB_Adot2B__plus_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24440,10 +36054,10 @@ GrB_Info GB_Adot2B__plus_islt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_uint8
+GrB_Info GB_Adot3B__plus_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24451,32 +36065,32 @@ GrB_Info GB_Adot3B__plus_islt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_uint8
+GrB_Info GB_Asaxpy3B__plus_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_int16
+GrB_Info GB_Adot4B__plus_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_int16
+GrB_Info GB_Adot2B__plus_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24484,10 +36098,10 @@ GrB_Info GB_Adot2B__plus_islt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_int16
+GrB_Info GB_Adot3B__plus_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24495,32 +36109,32 @@ GrB_Info GB_Adot3B__plus_islt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_int16
+GrB_Info GB_Asaxpy3B__plus_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_uint16
+GrB_Info GB_Adot4B__plus_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_uint16
+GrB_Info GB_Adot2B__plus_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24528,10 +36142,10 @@ GrB_Info GB_Adot2B__plus_islt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_uint16
+GrB_Info GB_Adot3B__plus_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24539,32 +36153,32 @@ GrB_Info GB_Adot3B__plus_islt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_uint16
+GrB_Info GB_Asaxpy3B__plus_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_int32
+GrB_Info GB_Adot4B__plus_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_int32
+GrB_Info GB_Adot2B__times_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24572,10 +36186,10 @@ GrB_Info GB_Adot2B__plus_islt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_int32
+GrB_Info GB_Adot3B__times_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24583,32 +36197,32 @@ GrB_Info GB_Adot3B__plus_islt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_int32
+GrB_Info GB_Asaxpy3B__times_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_uint32
+GrB_Info GB_Adot4B__times_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_uint32
+GrB_Info GB_Adot2B__times_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24616,10 +36230,10 @@ GrB_Info GB_Adot2B__plus_islt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_uint32
+GrB_Info GB_Adot3B__times_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24627,32 +36241,32 @@ GrB_Info GB_Adot3B__plus_islt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_uint32
+GrB_Info GB_Asaxpy3B__times_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_int64
+GrB_Info GB_Adot4B__times_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_int64
+GrB_Info GB_Adot2B__times_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24660,10 +36274,10 @@ GrB_Info GB_Adot2B__plus_islt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_int64
+GrB_Info GB_Adot3B__times_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24671,32 +36285,32 @@ GrB_Info GB_Adot3B__plus_islt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_int64
+GrB_Info GB_Asaxpy3B__times_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_uint64
+GrB_Info GB_Adot4B__times_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_uint64
+GrB_Info GB_Adot2B__times_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24704,10 +36318,10 @@ GrB_Info GB_Adot2B__plus_islt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_uint64
+GrB_Info GB_Adot3B__times_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24715,32 +36329,32 @@ GrB_Info GB_Adot3B__plus_islt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_uint64
+GrB_Info GB_Asaxpy3B__times_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_fp32
+GrB_Info GB_Adot4B__times_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_fp32
+GrB_Info GB_Adot2B__times_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24748,10 +36362,10 @@ GrB_Info GB_Adot2B__plus_islt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_fp32
+GrB_Info GB_Adot3B__times_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24759,32 +36373,32 @@ GrB_Info GB_Adot3B__plus_islt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_fp32
+GrB_Info GB_Asaxpy3B__times_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_islt_fp64
+GrB_Info GB_Adot4B__times_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_islt_fp64
+GrB_Info GB_Adot2B__times_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24792,10 +36406,10 @@ GrB_Info GB_Adot2B__plus_islt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_islt_fp64
+GrB_Info GB_Adot3B__times_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24803,32 +36417,32 @@ GrB_Info GB_Adot3B__plus_islt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_islt_fp64
+GrB_Info GB_Asaxpy3B__times_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_int8
+GrB_Info GB_Adot4B__times_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_int8
+GrB_Info GB_Adot2B__times_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24836,10 +36450,10 @@ GrB_Info GB_Adot2B__times_islt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_int8
+GrB_Info GB_Adot3B__times_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24847,32 +36461,32 @@ GrB_Info GB_Adot3B__times_islt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_int8
+GrB_Info GB_Asaxpy3B__times_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_uint8
+GrB_Info GB_Adot4B__times_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_uint8
+GrB_Info GB_Adot2B__times_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24880,10 +36494,10 @@ GrB_Info GB_Adot2B__times_islt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_uint8
+GrB_Info GB_Adot3B__times_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24891,32 +36505,32 @@ GrB_Info GB_Adot3B__times_islt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_uint8
+GrB_Info GB_Asaxpy3B__times_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_int16
+GrB_Info GB_Adot4B__times_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_int16
+GrB_Info GB_Adot2B__times_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24924,10 +36538,10 @@ GrB_Info GB_Adot2B__times_islt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_int16
+GrB_Info GB_Adot3B__times_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24935,32 +36549,32 @@ GrB_Info GB_Adot3B__times_islt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_int16
+GrB_Info GB_Asaxpy3B__times_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_uint16
+GrB_Info GB_Adot4B__times_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_uint16
+GrB_Info GB_Adot2B__times_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -24968,10 +36582,10 @@ GrB_Info GB_Adot2B__times_islt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_uint16
+GrB_Info GB_Adot3B__times_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -24979,32 +36593,32 @@ GrB_Info GB_Adot3B__times_islt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_uint16
+GrB_Info GB_Asaxpy3B__times_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_int32
+GrB_Info GB_Adot4B__times_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_int32
+GrB_Info GB_Adot2B__lor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25012,10 +36626,10 @@ GrB_Info GB_Adot2B__times_islt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_int32
+GrB_Info GB_Adot3B__lor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25023,32 +36637,32 @@ GrB_Info GB_Adot3B__times_islt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_int32
+GrB_Info GB_Asaxpy3B__lor_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_uint32
+GrB_Info GB_Adot4B__lor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_uint32
+GrB_Info GB_Adot2B__lor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25056,10 +36670,10 @@ GrB_Info GB_Adot2B__times_islt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_uint32
+GrB_Info GB_Adot3B__lor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25067,32 +36681,32 @@ GrB_Info GB_Adot3B__times_islt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_uint32
+GrB_Info GB_Asaxpy3B__lor_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_int64
+GrB_Info GB_Adot4B__lor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_int64
+GrB_Info GB_Adot2B__lor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25100,10 +36714,10 @@ GrB_Info GB_Adot2B__times_islt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_int64
+GrB_Info GB_Adot3B__lor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25111,32 +36725,32 @@ GrB_Info GB_Adot3B__times_islt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_int64
+GrB_Info GB_Asaxpy3B__lor_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_uint64
+GrB_Info GB_Adot4B__lor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_uint64
+GrB_Info GB_Adot2B__lor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25144,10 +36758,10 @@ GrB_Info GB_Adot2B__times_islt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_uint64
+GrB_Info GB_Adot3B__lor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25155,32 +36769,32 @@ GrB_Info GB_Adot3B__times_islt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_uint64
+GrB_Info GB_Asaxpy3B__lor_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_fp32
+GrB_Info GB_Adot4B__lor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_fp32
+GrB_Info GB_Adot2B__lor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25188,10 +36802,10 @@ GrB_Info GB_Adot2B__times_islt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_fp32
+GrB_Info GB_Adot3B__lor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25199,32 +36813,32 @@ GrB_Info GB_Adot3B__times_islt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_fp32
+GrB_Info GB_Asaxpy3B__lor_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_islt_fp64
+GrB_Info GB_Adot4B__lor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_islt_fp64
+GrB_Info GB_Adot2B__lor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25232,10 +36846,10 @@ GrB_Info GB_Adot2B__times_islt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_islt_fp64
+GrB_Info GB_Adot3B__lor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25243,32 +36857,32 @@ GrB_Info GB_Adot3B__times_islt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_islt_fp64
+GrB_Info GB_Asaxpy3B__lor_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_int8
+GrB_Info GB_Adot4B__lor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_int8
+GrB_Info GB_Adot2B__lor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25276,10 +36890,10 @@ GrB_Info GB_Adot2B__min_isge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_int8
+GrB_Info GB_Adot3B__lor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25287,32 +36901,32 @@ GrB_Info GB_Adot3B__min_isge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_int8
+GrB_Info GB_Asaxpy3B__lor_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_int16
+GrB_Info GB_Adot4B__lor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_int16
+GrB_Info GB_Adot2B__lor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25320,10 +36934,10 @@ GrB_Info GB_Adot2B__min_isge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_int16
+GrB_Info GB_Adot3B__lor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25331,32 +36945,32 @@ GrB_Info GB_Adot3B__min_isge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_int16
+GrB_Info GB_Asaxpy3B__lor_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_int32
+GrB_Info GB_Adot4B__lor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_int32
+GrB_Info GB_Adot2B__lor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25364,10 +36978,10 @@ GrB_Info GB_Adot2B__min_isge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_int32
+GrB_Info GB_Adot3B__lor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25375,32 +36989,32 @@ GrB_Info GB_Adot3B__min_isge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_int32
+GrB_Info GB_Asaxpy3B__lor_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_int64
+GrB_Info GB_Adot4B__lor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_int64
+GrB_Info GB_Adot2B__lor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25408,10 +37022,10 @@ GrB_Info GB_Adot2B__min_isge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_int64
+GrB_Info GB_Adot3B__lor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25419,32 +37033,32 @@ GrB_Info GB_Adot3B__min_isge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_int64
+GrB_Info GB_Asaxpy3B__lor_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_uint8
+GrB_Info GB_Adot4B__lor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_uint8
+GrB_Info GB_Adot2B__lor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25452,10 +37066,10 @@ GrB_Info GB_Adot2B__min_isge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_uint8
+GrB_Info GB_Adot3B__lor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25463,32 +37077,32 @@ GrB_Info GB_Adot3B__min_isge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_uint8
+GrB_Info GB_Asaxpy3B__lor_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_uint16
+GrB_Info GB_Adot4B__lor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_uint16
+GrB_Info GB_Adot2B__any_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25496,10 +37110,10 @@ GrB_Info GB_Adot2B__min_isge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_uint16
+GrB_Info GB_Adot3B__any_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25507,32 +37121,32 @@ GrB_Info GB_Adot3B__min_isge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_uint16
+GrB_Info GB_Asaxpy3B__any_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_uint32
+GrB_Info GB_Adot4B__any_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_uint32
+GrB_Info GB_Adot2B__any_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25540,10 +37154,10 @@ GrB_Info GB_Adot2B__min_isge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_uint32
+GrB_Info GB_Adot3B__any_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25551,32 +37165,32 @@ GrB_Info GB_Adot3B__min_isge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_uint32
+GrB_Info GB_Asaxpy3B__any_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_uint64
+GrB_Info GB_Adot4B__any_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_uint64
+GrB_Info GB_Adot2B__any_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25584,10 +37198,10 @@ GrB_Info GB_Adot2B__min_isge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_uint64
+GrB_Info GB_Adot3B__any_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25595,32 +37209,32 @@ GrB_Info GB_Adot3B__min_isge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_uint64
+GrB_Info GB_Asaxpy3B__any_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_fp32
+GrB_Info GB_Adot4B__any_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_fp32
+GrB_Info GB_Adot2B__any_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25628,10 +37242,10 @@ GrB_Info GB_Adot2B__min_isge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_fp32
+GrB_Info GB_Adot3B__any_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25639,32 +37253,32 @@ GrB_Info GB_Adot3B__min_isge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_fp32
+GrB_Info GB_Asaxpy3B__any_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isge_fp64
+GrB_Info GB_Adot4B__any_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isge_fp64
+GrB_Info GB_Adot2B__any_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25672,10 +37286,10 @@ GrB_Info GB_Adot2B__min_isge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isge_fp64
+GrB_Info GB_Adot3B__any_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25683,32 +37297,32 @@ GrB_Info GB_Adot3B__min_isge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isge_fp64
+GrB_Info GB_Asaxpy3B__any_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_int8
+GrB_Info GB_Adot4B__any_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_int8
+GrB_Info GB_Adot2B__any_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25716,10 +37330,10 @@ GrB_Info GB_Adot2B__max_isge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_int8
+GrB_Info GB_Adot3B__any_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25727,32 +37341,32 @@ GrB_Info GB_Adot3B__max_isge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_int8
+GrB_Info GB_Asaxpy3B__any_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_int16
+GrB_Info GB_Adot4B__any_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_int16
+GrB_Info GB_Adot2B__any_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25760,10 +37374,10 @@ GrB_Info GB_Adot2B__max_isge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_int16
+GrB_Info GB_Adot3B__any_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25771,32 +37385,32 @@ GrB_Info GB_Adot3B__max_isge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_int16
+GrB_Info GB_Asaxpy3B__any_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_int32
+GrB_Info GB_Adot4B__any_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_int32
+GrB_Info GB_Adot2B__any_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25804,10 +37418,10 @@ GrB_Info GB_Adot2B__max_isge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_int32
+GrB_Info GB_Adot3B__any_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25815,32 +37429,32 @@ GrB_Info GB_Adot3B__max_isge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_int32
+GrB_Info GB_Asaxpy3B__any_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_int64
+GrB_Info GB_Adot4B__any_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_int64
+GrB_Info GB_Adot2B__any_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25848,10 +37462,10 @@ GrB_Info GB_Adot2B__max_isge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_int64
+GrB_Info GB_Adot3B__any_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25859,32 +37473,32 @@ GrB_Info GB_Adot3B__max_isge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_int64
+GrB_Info GB_Asaxpy3B__any_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_uint8
+GrB_Info GB_Adot4B__any_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_uint8
+GrB_Info GB_Adot2B__any_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25892,10 +37506,10 @@ GrB_Info GB_Adot2B__max_isge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_uint8
+GrB_Info GB_Adot3B__any_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25903,32 +37517,32 @@ GrB_Info GB_Adot3B__max_isge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_uint8
+GrB_Info GB_Asaxpy3B__any_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_uint16
+GrB_Info GB_Adot4B__any_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_uint16
+GrB_Info GB_Adot2B__any_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25936,10 +37550,10 @@ GrB_Info GB_Adot2B__max_isge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_uint16
+GrB_Info GB_Adot3B__any_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25947,32 +37561,32 @@ GrB_Info GB_Adot3B__max_isge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_uint16
+GrB_Info GB_Asaxpy3B__any_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_uint32
+GrB_Info GB_Adot4B__any_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_uint32
+GrB_Info GB_Adot2B__land_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -25980,10 +37594,10 @@ GrB_Info GB_Adot2B__max_isge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_uint32
+GrB_Info GB_Adot3B__land_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -25991,32 +37605,32 @@ GrB_Info GB_Adot3B__max_isge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_uint32
+GrB_Info GB_Asaxpy3B__land_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_uint64
+GrB_Info GB_Adot4B__land_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_uint64
+GrB_Info GB_Adot2B__land_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26024,10 +37638,10 @@ GrB_Info GB_Adot2B__max_isge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_uint64
+GrB_Info GB_Adot3B__land_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26035,32 +37649,32 @@ GrB_Info GB_Adot3B__max_isge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_uint64
+GrB_Info GB_Asaxpy3B__land_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_fp32
+GrB_Info GB_Adot4B__land_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_fp32
+GrB_Info GB_Adot2B__land_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26068,10 +37682,10 @@ GrB_Info GB_Adot2B__max_isge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_fp32
+GrB_Info GB_Adot3B__land_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26079,32 +37693,32 @@ GrB_Info GB_Adot3B__max_isge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_fp32
+GrB_Info GB_Asaxpy3B__land_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isge_fp64
+GrB_Info GB_Adot4B__land_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isge_fp64
+GrB_Info GB_Adot2B__land_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26112,10 +37726,10 @@ GrB_Info GB_Adot2B__max_isge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isge_fp64
+GrB_Info GB_Adot3B__land_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26123,32 +37737,32 @@ GrB_Info GB_Adot3B__max_isge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isge_fp64
+GrB_Info GB_Asaxpy3B__land_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_int8
+GrB_Info GB_Adot4B__land_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_int8
+GrB_Info GB_Adot2B__land_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26156,10 +37770,10 @@ GrB_Info GB_Adot2B__plus_isge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_int8
+GrB_Info GB_Adot3B__land_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26167,32 +37781,32 @@ GrB_Info GB_Adot3B__plus_isge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_int8
+GrB_Info GB_Asaxpy3B__land_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_uint8
+GrB_Info GB_Adot4B__land_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_uint8
+GrB_Info GB_Adot2B__land_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26200,10 +37814,10 @@ GrB_Info GB_Adot2B__plus_isge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_uint8
+GrB_Info GB_Adot3B__land_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26211,32 +37825,32 @@ GrB_Info GB_Adot3B__plus_isge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_uint8
+GrB_Info GB_Asaxpy3B__land_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_int16
+GrB_Info GB_Adot4B__land_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_int16
+GrB_Info GB_Adot2B__land_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26244,10 +37858,10 @@ GrB_Info GB_Adot2B__plus_isge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_int16
+GrB_Info GB_Adot3B__land_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26255,32 +37869,32 @@ GrB_Info GB_Adot3B__plus_isge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_int16
+GrB_Info GB_Asaxpy3B__land_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_uint16
+GrB_Info GB_Adot4B__land_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_uint16
+GrB_Info GB_Adot2B__land_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26288,10 +37902,10 @@ GrB_Info GB_Adot2B__plus_isge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_uint16
+GrB_Info GB_Adot3B__land_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26299,32 +37913,32 @@ GrB_Info GB_Adot3B__plus_isge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_uint16
+GrB_Info GB_Asaxpy3B__land_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_int32
+GrB_Info GB_Adot4B__land_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_int32
+GrB_Info GB_Adot2B__land_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26332,10 +37946,10 @@ GrB_Info GB_Adot2B__plus_isge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_int32
+GrB_Info GB_Adot3B__land_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26343,32 +37957,32 @@ GrB_Info GB_Adot3B__plus_isge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_int32
+GrB_Info GB_Asaxpy3B__land_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_uint32
+GrB_Info GB_Adot4B__land_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_uint32
+GrB_Info GB_Adot2B__land_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26376,10 +37990,10 @@ GrB_Info GB_Adot2B__plus_isge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_uint32
+GrB_Info GB_Adot3B__land_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26387,32 +38001,32 @@ GrB_Info GB_Adot3B__plus_isge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_uint32
+GrB_Info GB_Asaxpy3B__land_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_int64
+GrB_Info GB_Adot4B__land_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_int64
+GrB_Info GB_Adot2B__land_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26420,10 +38034,10 @@ GrB_Info GB_Adot2B__plus_isge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_int64
+GrB_Info GB_Adot3B__land_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26431,32 +38045,32 @@ GrB_Info GB_Adot3B__plus_isge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_int64
+GrB_Info GB_Asaxpy3B__land_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_uint64
+GrB_Info GB_Adot4B__land_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_uint64
+GrB_Info GB_Adot2B__lxor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26464,10 +38078,10 @@ GrB_Info GB_Adot2B__plus_isge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_uint64
+GrB_Info GB_Adot3B__lxor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26475,32 +38089,32 @@ GrB_Info GB_Adot3B__plus_isge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_uint64
+GrB_Info GB_Asaxpy3B__lxor_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_fp32
+GrB_Info GB_Adot4B__lxor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_fp32
+GrB_Info GB_Adot2B__lxor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26508,10 +38122,10 @@ GrB_Info GB_Adot2B__plus_isge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_fp32
+GrB_Info GB_Adot3B__lxor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26519,32 +38133,32 @@ GrB_Info GB_Adot3B__plus_isge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_fp32
+GrB_Info GB_Asaxpy3B__lxor_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isge_fp64
+GrB_Info GB_Adot4B__lxor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isge_fp64
+GrB_Info GB_Adot2B__lxor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26552,10 +38166,10 @@ GrB_Info GB_Adot2B__plus_isge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isge_fp64
+GrB_Info GB_Adot3B__lxor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26563,32 +38177,32 @@ GrB_Info GB_Adot3B__plus_isge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isge_fp64
+GrB_Info GB_Asaxpy3B__lxor_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_int8
+GrB_Info GB_Adot4B__lxor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_int8
+GrB_Info GB_Adot2B__lxor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26596,10 +38210,10 @@ GrB_Info GB_Adot2B__times_isge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_int8
+GrB_Info GB_Adot3B__lxor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26607,32 +38221,32 @@ GrB_Info GB_Adot3B__times_isge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_int8
+GrB_Info GB_Asaxpy3B__lxor_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_uint8
+GrB_Info GB_Adot4B__lxor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_uint8
+GrB_Info GB_Adot2B__lxor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26640,10 +38254,10 @@ GrB_Info GB_Adot2B__times_isge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_uint8
+GrB_Info GB_Adot3B__lxor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26651,32 +38265,32 @@ GrB_Info GB_Adot3B__times_isge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_uint8
+GrB_Info GB_Asaxpy3B__lxor_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_int16
+GrB_Info GB_Adot4B__lxor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_int16
+GrB_Info GB_Adot2B__lxor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26684,10 +38298,10 @@ GrB_Info GB_Adot2B__times_isge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_int16
+GrB_Info GB_Adot3B__lxor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26695,32 +38309,32 @@ GrB_Info GB_Adot3B__times_isge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_int16
+GrB_Info GB_Asaxpy3B__lxor_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_uint16
+GrB_Info GB_Adot4B__lxor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_uint16
+GrB_Info GB_Adot2B__lxor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26728,10 +38342,10 @@ GrB_Info GB_Adot2B__times_isge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_uint16
+GrB_Info GB_Adot3B__lxor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26739,32 +38353,32 @@ GrB_Info GB_Adot3B__times_isge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_uint16
+GrB_Info GB_Asaxpy3B__lxor_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_int32
+GrB_Info GB_Adot4B__lxor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_int32
+GrB_Info GB_Adot2B__lxor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26772,10 +38386,10 @@ GrB_Info GB_Adot2B__times_isge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_int32
+GrB_Info GB_Adot3B__lxor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26783,32 +38397,32 @@ GrB_Info GB_Adot3B__times_isge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_int32
+GrB_Info GB_Asaxpy3B__lxor_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_uint32
+GrB_Info GB_Adot4B__lxor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_uint32
+GrB_Info GB_Adot2B__lxor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26816,10 +38430,10 @@ GrB_Info GB_Adot2B__times_isge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_uint32
+GrB_Info GB_Adot3B__lxor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26827,32 +38441,32 @@ GrB_Info GB_Adot3B__times_isge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_uint32
+GrB_Info GB_Asaxpy3B__lxor_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_int64
+GrB_Info GB_Adot4B__lxor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_int64
+GrB_Info GB_Adot2B__lxor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26860,10 +38474,10 @@ GrB_Info GB_Adot2B__times_isge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_int64
+GrB_Info GB_Adot3B__lxor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26871,32 +38485,32 @@ GrB_Info GB_Adot3B__times_isge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_int64
+GrB_Info GB_Asaxpy3B__lxor_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_uint64
+GrB_Info GB_Adot4B__lxor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_uint64
+GrB_Info GB_Adot2B__lxor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26904,10 +38518,10 @@ GrB_Info GB_Adot2B__times_isge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_uint64
+GrB_Info GB_Adot3B__lxor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26915,32 +38529,32 @@ GrB_Info GB_Adot3B__times_isge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_uint64
+GrB_Info GB_Asaxpy3B__lxor_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_fp32
+GrB_Info GB_Adot4B__lxor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_fp32
+GrB_Info GB_Adot2B__eq_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26948,10 +38562,10 @@ GrB_Info GB_Adot2B__times_isge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_fp32
+GrB_Info GB_Adot3B__eq_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -26959,32 +38573,32 @@ GrB_Info GB_Adot3B__times_isge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_fp32
+GrB_Info GB_Asaxpy3B__eq_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isge_fp64
+GrB_Info GB_Adot4B__eq_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isge_fp64
+GrB_Info GB_Adot2B__eq_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -26992,10 +38606,10 @@ GrB_Info GB_Adot2B__times_isge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isge_fp64
+GrB_Info GB_Adot3B__eq_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27003,32 +38617,32 @@ GrB_Info GB_Adot3B__times_isge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isge_fp64
+GrB_Info GB_Asaxpy3B__eq_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_int8
+GrB_Info GB_Adot4B__eq_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_int8
+GrB_Info GB_Adot2B__eq_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27036,10 +38650,10 @@ GrB_Info GB_Adot2B__min_isle_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_int8
+GrB_Info GB_Adot3B__eq_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27047,32 +38661,32 @@ GrB_Info GB_Adot3B__min_isle_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_int8
+GrB_Info GB_Asaxpy3B__eq_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_int16
+GrB_Info GB_Adot4B__eq_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_int16
+GrB_Info GB_Adot2B__eq_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27080,10 +38694,10 @@ GrB_Info GB_Adot2B__min_isle_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_int16
+GrB_Info GB_Adot3B__eq_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27091,32 +38705,32 @@ GrB_Info GB_Adot3B__min_isle_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_int16
+GrB_Info GB_Asaxpy3B__eq_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_int32
+GrB_Info GB_Adot4B__eq_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_int32
+GrB_Info GB_Adot2B__eq_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27124,10 +38738,10 @@ GrB_Info GB_Adot2B__min_isle_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_int32
+GrB_Info GB_Adot3B__eq_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27135,32 +38749,32 @@ GrB_Info GB_Adot3B__min_isle_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_int32
+GrB_Info GB_Asaxpy3B__eq_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_int64
+GrB_Info GB_Adot4B__eq_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_int64
+GrB_Info GB_Adot2B__eq_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27168,10 +38782,10 @@ GrB_Info GB_Adot2B__min_isle_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_int64
+GrB_Info GB_Adot3B__eq_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27179,32 +38793,32 @@ GrB_Info GB_Adot3B__min_isle_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_int64
+GrB_Info GB_Asaxpy3B__eq_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_uint8
+GrB_Info GB_Adot4B__eq_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_uint8
+GrB_Info GB_Adot2B__eq_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27212,10 +38826,10 @@ GrB_Info GB_Adot2B__min_isle_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_uint8
+GrB_Info GB_Adot3B__eq_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27223,32 +38837,32 @@ GrB_Info GB_Adot3B__min_isle_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_uint8
+GrB_Info GB_Asaxpy3B__eq_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_uint16
+GrB_Info GB_Adot4B__eq_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_uint16
+GrB_Info GB_Adot2B__eq_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27256,10 +38870,10 @@ GrB_Info GB_Adot2B__min_isle_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_uint16
+GrB_Info GB_Adot3B__eq_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27267,32 +38881,32 @@ GrB_Info GB_Adot3B__min_isle_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_uint16
+GrB_Info GB_Asaxpy3B__eq_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_uint32
+GrB_Info GB_Adot4B__eq_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_uint32
+GrB_Info GB_Adot2B__eq_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27300,10 +38914,10 @@ GrB_Info GB_Adot2B__min_isle_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_uint32
+GrB_Info GB_Adot3B__eq_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27311,32 +38925,32 @@ GrB_Info GB_Adot3B__min_isle_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_uint32
+GrB_Info GB_Asaxpy3B__eq_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_uint64
+GrB_Info GB_Adot4B__eq_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_uint64
+GrB_Info GB_Adot2B__eq_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27344,10 +38958,10 @@ GrB_Info GB_Adot2B__min_isle_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_uint64
+GrB_Info GB_Adot3B__eq_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27355,32 +38969,32 @@ GrB_Info GB_Adot3B__min_isle_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_uint64
+GrB_Info GB_Asaxpy3B__eq_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_fp32
+GrB_Info GB_Adot4B__eq_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_fp32
+GrB_Info GB_Adot2B__eq_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27388,10 +39002,10 @@ GrB_Info GB_Adot2B__min_isle_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_fp32
+GrB_Info GB_Adot3B__eq_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27399,32 +39013,32 @@ GrB_Info GB_Adot3B__min_isle_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_fp32
+GrB_Info GB_Asaxpy3B__eq_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_isle_fp64
+GrB_Info GB_Adot4B__eq_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_isle_fp64
+GrB_Info GB_Adot2B__lor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27432,10 +39046,10 @@ GrB_Info GB_Adot2B__min_isle_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_isle_fp64
+GrB_Info GB_Adot3B__lor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27443,32 +39057,32 @@ GrB_Info GB_Adot3B__min_isle_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_isle_fp64
+GrB_Info GB_Asaxpy3B__lor_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_int8
+GrB_Info GB_Adot4B__lor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_int8
+GrB_Info GB_Adot2B__lor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27476,10 +39090,10 @@ GrB_Info GB_Adot2B__max_isle_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_int8
+GrB_Info GB_Adot3B__lor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27487,32 +39101,32 @@ GrB_Info GB_Adot3B__max_isle_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_int8
+GrB_Info GB_Asaxpy3B__lor_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_int16
+GrB_Info GB_Adot4B__lor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_int16
+GrB_Info GB_Adot2B__lor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27520,10 +39134,10 @@ GrB_Info GB_Adot2B__max_isle_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_int16
+GrB_Info GB_Adot3B__lor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27531,32 +39145,32 @@ GrB_Info GB_Adot3B__max_isle_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_int16
+GrB_Info GB_Asaxpy3B__lor_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_int32
+GrB_Info GB_Adot4B__lor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_int32
+GrB_Info GB_Adot2B__lor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27564,10 +39178,10 @@ GrB_Info GB_Adot2B__max_isle_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_int32
+GrB_Info GB_Adot3B__lor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27575,32 +39189,32 @@ GrB_Info GB_Adot3B__max_isle_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_int32
+GrB_Info GB_Asaxpy3B__lor_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_int64
+GrB_Info GB_Adot4B__lor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_int64
+GrB_Info GB_Adot2B__lor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27608,10 +39222,10 @@ GrB_Info GB_Adot2B__max_isle_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_int64
+GrB_Info GB_Adot3B__lor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27619,32 +39233,32 @@ GrB_Info GB_Adot3B__max_isle_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_int64
+GrB_Info GB_Asaxpy3B__lor_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_uint8
+GrB_Info GB_Adot4B__lor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_uint8
+GrB_Info GB_Adot2B__lor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27652,10 +39266,10 @@ GrB_Info GB_Adot2B__max_isle_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_uint8
+GrB_Info GB_Adot3B__lor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27663,32 +39277,32 @@ GrB_Info GB_Adot3B__max_isle_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_uint8
+GrB_Info GB_Asaxpy3B__lor_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_uint16
+GrB_Info GB_Adot4B__lor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_uint16
+GrB_Info GB_Adot2B__lor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27696,10 +39310,10 @@ GrB_Info GB_Adot2B__max_isle_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_uint16
+GrB_Info GB_Adot3B__lor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27707,32 +39321,32 @@ GrB_Info GB_Adot3B__max_isle_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_uint16
+GrB_Info GB_Asaxpy3B__lor_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_uint32
+GrB_Info GB_Adot4B__lor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_uint32
+GrB_Info GB_Adot2B__lor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27740,10 +39354,10 @@ GrB_Info GB_Adot2B__max_isle_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_uint32
+GrB_Info GB_Adot3B__lor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27751,32 +39365,32 @@ GrB_Info GB_Adot3B__max_isle_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_uint32
+GrB_Info GB_Asaxpy3B__lor_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_uint64
+GrB_Info GB_Adot4B__lor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_uint64
+GrB_Info GB_Adot2B__lor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27784,10 +39398,10 @@ GrB_Info GB_Adot2B__max_isle_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_uint64
+GrB_Info GB_Adot3B__lor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27795,32 +39409,32 @@ GrB_Info GB_Adot3B__max_isle_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_uint64
+GrB_Info GB_Asaxpy3B__lor_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_fp32
+GrB_Info GB_Adot4B__lor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_fp32
+GrB_Info GB_Adot2B__lor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27828,10 +39442,10 @@ GrB_Info GB_Adot2B__max_isle_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_fp32
+GrB_Info GB_Adot3B__lor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27839,32 +39453,32 @@ GrB_Info GB_Adot3B__max_isle_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_fp32
+GrB_Info GB_Asaxpy3B__lor_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_isle_fp64
+GrB_Info GB_Adot4B__lor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_isle_fp64
+GrB_Info GB_Adot2B__any_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27872,10 +39486,10 @@ GrB_Info GB_Adot2B__max_isle_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_isle_fp64
+GrB_Info GB_Adot3B__any_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27883,32 +39497,32 @@ GrB_Info GB_Adot3B__max_isle_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_isle_fp64
+GrB_Info GB_Asaxpy3B__any_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_int8
+GrB_Info GB_Adot4B__any_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_int8
+GrB_Info GB_Adot2B__any_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27916,10 +39530,10 @@ GrB_Info GB_Adot2B__plus_isle_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_int8
+GrB_Info GB_Adot3B__any_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27927,32 +39541,32 @@ GrB_Info GB_Adot3B__plus_isle_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_int8
+GrB_Info GB_Asaxpy3B__any_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_uint8
+GrB_Info GB_Adot4B__any_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_uint8
+GrB_Info GB_Adot2B__any_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -27960,10 +39574,10 @@ GrB_Info GB_Adot2B__plus_isle_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_uint8
+GrB_Info GB_Adot3B__any_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -27971,32 +39585,32 @@ GrB_Info GB_Adot3B__plus_isle_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_uint8
+GrB_Info GB_Asaxpy3B__any_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_int16
+GrB_Info GB_Adot4B__any_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_int16
+GrB_Info GB_Adot2B__any_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28004,10 +39618,10 @@ GrB_Info GB_Adot2B__plus_isle_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_int16
+GrB_Info GB_Adot3B__any_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28015,32 +39629,32 @@ GrB_Info GB_Adot3B__plus_isle_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_int16
+GrB_Info GB_Asaxpy3B__any_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_uint16
+GrB_Info GB_Adot4B__any_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_uint16
+GrB_Info GB_Adot2B__any_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28048,10 +39662,10 @@ GrB_Info GB_Adot2B__plus_isle_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_uint16
+GrB_Info GB_Adot3B__any_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28059,32 +39673,32 @@ GrB_Info GB_Adot3B__plus_isle_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_uint16
+GrB_Info GB_Asaxpy3B__any_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_int32
+GrB_Info GB_Adot4B__any_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_int32
+GrB_Info GB_Adot2B__any_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28092,10 +39706,10 @@ GrB_Info GB_Adot2B__plus_isle_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_int32
+GrB_Info GB_Adot3B__any_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28103,32 +39717,32 @@ GrB_Info GB_Adot3B__plus_isle_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_int32
+GrB_Info GB_Asaxpy3B__any_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_uint32
+GrB_Info GB_Adot4B__any_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_uint32
+GrB_Info GB_Adot2B__any_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28136,10 +39750,10 @@ GrB_Info GB_Adot2B__plus_isle_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_uint32
+GrB_Info GB_Adot3B__any_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28147,32 +39761,32 @@ GrB_Info GB_Adot3B__plus_isle_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_uint32
+GrB_Info GB_Asaxpy3B__any_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_int64
+GrB_Info GB_Adot4B__any_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_int64
+GrB_Info GB_Adot2B__any_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28180,10 +39794,10 @@ GrB_Info GB_Adot2B__plus_isle_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_int64
+GrB_Info GB_Adot3B__any_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28191,32 +39805,32 @@ GrB_Info GB_Adot3B__plus_isle_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_int64
+GrB_Info GB_Asaxpy3B__any_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_uint64
+GrB_Info GB_Adot4B__any_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_uint64
+GrB_Info GB_Adot2B__any_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28224,10 +39838,10 @@ GrB_Info GB_Adot2B__plus_isle_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_uint64
+GrB_Info GB_Adot3B__any_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28235,32 +39849,32 @@ GrB_Info GB_Adot3B__plus_isle_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_uint64
+GrB_Info GB_Asaxpy3B__any_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_fp32
+GrB_Info GB_Adot4B__any_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_fp32
+GrB_Info GB_Adot2B__any_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28268,10 +39882,10 @@ GrB_Info GB_Adot2B__plus_isle_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_fp32
+GrB_Info GB_Adot3B__any_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28279,32 +39893,32 @@ GrB_Info GB_Adot3B__plus_isle_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_fp32
+GrB_Info GB_Asaxpy3B__any_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_isle_fp64
+GrB_Info GB_Adot4B__any_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_isle_fp64
+GrB_Info GB_Adot2B__land_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28312,10 +39926,10 @@ GrB_Info GB_Adot2B__plus_isle_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_isle_fp64
+GrB_Info GB_Adot3B__land_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28323,32 +39937,32 @@ GrB_Info GB_Adot3B__plus_isle_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_isle_fp64
+GrB_Info GB_Asaxpy3B__land_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_int8
+GrB_Info GB_Adot4B__land_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_int8
+GrB_Info GB_Adot2B__land_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28356,10 +39970,10 @@ GrB_Info GB_Adot2B__times_isle_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_int8
+GrB_Info GB_Adot3B__land_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28367,32 +39981,32 @@ GrB_Info GB_Adot3B__times_isle_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_int8
+GrB_Info GB_Asaxpy3B__land_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_uint8
+GrB_Info GB_Adot4B__land_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_uint8
+GrB_Info GB_Adot2B__land_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28400,10 +40014,10 @@ GrB_Info GB_Adot2B__times_isle_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_uint8
+GrB_Info GB_Adot3B__land_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28411,32 +40025,32 @@ GrB_Info GB_Adot3B__times_isle_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_uint8
+GrB_Info GB_Asaxpy3B__land_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_int16
+GrB_Info GB_Adot4B__land_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_int16
+GrB_Info GB_Adot2B__land_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28444,10 +40058,10 @@ GrB_Info GB_Adot2B__times_isle_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_int16
+GrB_Info GB_Adot3B__land_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28455,32 +40069,32 @@ GrB_Info GB_Adot3B__times_isle_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_int16
+GrB_Info GB_Asaxpy3B__land_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_uint16
+GrB_Info GB_Adot4B__land_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_uint16
+GrB_Info GB_Adot2B__land_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28488,10 +40102,10 @@ GrB_Info GB_Adot2B__times_isle_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_uint16
+GrB_Info GB_Adot3B__land_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28499,32 +40113,32 @@ GrB_Info GB_Adot3B__times_isle_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_uint16
+GrB_Info GB_Asaxpy3B__land_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_int32
+GrB_Info GB_Adot4B__land_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_int32
+GrB_Info GB_Adot2B__land_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28532,10 +40146,10 @@ GrB_Info GB_Adot2B__times_isle_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_int32
+GrB_Info GB_Adot3B__land_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28543,32 +40157,32 @@ GrB_Info GB_Adot3B__times_isle_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_int32
+GrB_Info GB_Asaxpy3B__land_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_uint32
+GrB_Info GB_Adot4B__land_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_uint32
+GrB_Info GB_Adot2B__land_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28576,10 +40190,10 @@ GrB_Info GB_Adot2B__times_isle_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_uint32
+GrB_Info GB_Adot3B__land_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28587,32 +40201,32 @@ GrB_Info GB_Adot3B__times_isle_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_uint32
+GrB_Info GB_Asaxpy3B__land_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_int64
+GrB_Info GB_Adot4B__land_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_int64
+GrB_Info GB_Adot2B__land_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28620,10 +40234,10 @@ GrB_Info GB_Adot2B__times_isle_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_int64
+GrB_Info GB_Adot3B__land_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28631,32 +40245,32 @@ GrB_Info GB_Adot3B__times_isle_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_int64
+GrB_Info GB_Asaxpy3B__land_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_uint64
+GrB_Info GB_Adot4B__land_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_uint64
+GrB_Info GB_Adot2B__land_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28664,10 +40278,10 @@ GrB_Info GB_Adot2B__times_isle_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_uint64
+GrB_Info GB_Adot3B__land_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28675,32 +40289,32 @@ GrB_Info GB_Adot3B__times_isle_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_uint64
+GrB_Info GB_Asaxpy3B__land_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_fp32
+GrB_Info GB_Adot4B__land_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_fp32
+GrB_Info GB_Adot2B__land_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28708,10 +40322,10 @@ GrB_Info GB_Adot2B__times_isle_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_fp32
+GrB_Info GB_Adot3B__land_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28719,32 +40333,32 @@ GrB_Info GB_Adot3B__times_isle_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_fp32
+GrB_Info GB_Asaxpy3B__land_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_isle_fp64
+GrB_Info GB_Adot4B__land_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_isle_fp64
+GrB_Info GB_Adot2B__lxor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28752,10 +40366,10 @@ GrB_Info GB_Adot2B__times_isle_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_isle_fp64
+GrB_Info GB_Adot3B__lxor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28763,32 +40377,32 @@ GrB_Info GB_Adot3B__times_isle_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_isle_fp64
+GrB_Info GB_Asaxpy3B__lxor_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_bool
+GrB_Info GB_Adot4B__lxor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_bool
+GrB_Info GB_Adot2B__lxor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28796,10 +40410,10 @@ GrB_Info GB_Adot2B__lor_eq_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_bool
+GrB_Info GB_Adot3B__lxor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28807,32 +40421,32 @@ GrB_Info GB_Adot3B__lor_eq_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_bool
+GrB_Info GB_Asaxpy3B__lxor_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_int8
+GrB_Info GB_Adot4B__lxor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_int8
+GrB_Info GB_Adot2B__lxor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28840,10 +40454,10 @@ GrB_Info GB_Adot2B__lor_eq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_int8
+GrB_Info GB_Adot3B__lxor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28851,32 +40465,32 @@ GrB_Info GB_Adot3B__lor_eq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_int8
+GrB_Info GB_Asaxpy3B__lxor_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_uint8
+GrB_Info GB_Adot4B__lxor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_uint8
+GrB_Info GB_Adot2B__lxor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28884,10 +40498,10 @@ GrB_Info GB_Adot2B__lor_eq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_uint8
+GrB_Info GB_Adot3B__lxor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28895,32 +40509,32 @@ GrB_Info GB_Adot3B__lor_eq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_uint8
+GrB_Info GB_Asaxpy3B__lxor_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_int16
+GrB_Info GB_Adot4B__lxor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_int16
+GrB_Info GB_Adot2B__lxor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28928,10 +40542,10 @@ GrB_Info GB_Adot2B__lor_eq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_int16
+GrB_Info GB_Adot3B__lxor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28939,32 +40553,32 @@ GrB_Info GB_Adot3B__lor_eq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_int16
+GrB_Info GB_Asaxpy3B__lxor_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_uint16
+GrB_Info GB_Adot4B__lxor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_uint16
+GrB_Info GB_Adot2B__lxor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -28972,10 +40586,10 @@ GrB_Info GB_Adot2B__lor_eq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_uint16
+GrB_Info GB_Adot3B__lxor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -28983,32 +40597,32 @@ GrB_Info GB_Adot3B__lor_eq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_uint16
+GrB_Info GB_Asaxpy3B__lxor_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_int32
+GrB_Info GB_Adot4B__lxor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_int32
+GrB_Info GB_Adot2B__lxor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29016,10 +40630,10 @@ GrB_Info GB_Adot2B__lor_eq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_int32
+GrB_Info GB_Adot3B__lxor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29027,32 +40641,32 @@ GrB_Info GB_Adot3B__lor_eq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_int32
+GrB_Info GB_Asaxpy3B__lxor_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_uint32
+GrB_Info GB_Adot4B__lxor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_uint32
+GrB_Info GB_Adot2B__lxor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29060,10 +40674,10 @@ GrB_Info GB_Adot2B__lor_eq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_uint32
+GrB_Info GB_Adot3B__lxor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29071,32 +40685,32 @@ GrB_Info GB_Adot3B__lor_eq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_uint32
+GrB_Info GB_Asaxpy3B__lxor_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_int64
+GrB_Info GB_Adot4B__lxor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_int64
+GrB_Info GB_Adot2B__lxor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29104,10 +40718,10 @@ GrB_Info GB_Adot2B__lor_eq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_int64
+GrB_Info GB_Adot3B__lxor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29115,32 +40729,32 @@ GrB_Info GB_Adot3B__lor_eq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_int64
+GrB_Info GB_Asaxpy3B__lxor_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_uint64
+GrB_Info GB_Adot4B__lxor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_uint64
+GrB_Info GB_Adot2B__lxor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29148,10 +40762,10 @@ GrB_Info GB_Adot2B__lor_eq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_uint64
+GrB_Info GB_Adot3B__lxor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29159,32 +40773,32 @@ GrB_Info GB_Adot3B__lor_eq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_uint64
+GrB_Info GB_Asaxpy3B__lxor_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_fp32
+GrB_Info GB_Adot4B__lxor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_fp32
+GrB_Info GB_Adot2B__eq_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29192,10 +40806,10 @@ GrB_Info GB_Adot2B__lor_eq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_fp32
+GrB_Info GB_Adot3B__eq_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29203,32 +40817,32 @@ GrB_Info GB_Adot3B__lor_eq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_fp32
+GrB_Info GB_Asaxpy3B__eq_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_eq_fp64
+GrB_Info GB_Adot4B__eq_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_eq_fp64
+GrB_Info GB_Adot2B__eq_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29236,10 +40850,10 @@ GrB_Info GB_Adot2B__lor_eq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_eq_fp64
+GrB_Info GB_Adot3B__eq_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29247,32 +40861,32 @@ GrB_Info GB_Adot3B__lor_eq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_eq_fp64
+GrB_Info GB_Asaxpy3B__eq_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_bool
+GrB_Info GB_Adot4B__eq_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_bool
+GrB_Info GB_Adot2B__eq_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29280,10 +40894,10 @@ GrB_Info GB_Adot2B__land_eq_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_bool
+GrB_Info GB_Adot3B__eq_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29291,32 +40905,32 @@ GrB_Info GB_Adot3B__land_eq_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_bool
+GrB_Info GB_Asaxpy3B__eq_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_int8
+GrB_Info GB_Adot4B__eq_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_int8
+GrB_Info GB_Adot2B__eq_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29324,10 +40938,10 @@ GrB_Info GB_Adot2B__land_eq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_int8
+GrB_Info GB_Adot3B__eq_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29335,32 +40949,32 @@ GrB_Info GB_Adot3B__land_eq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_int8
+GrB_Info GB_Asaxpy3B__eq_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_uint8
+GrB_Info GB_Adot4B__eq_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_uint8
+GrB_Info GB_Adot2B__eq_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29368,10 +40982,10 @@ GrB_Info GB_Adot2B__land_eq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_uint8
+GrB_Info GB_Adot3B__eq_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29379,32 +40993,32 @@ GrB_Info GB_Adot3B__land_eq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_uint8
+GrB_Info GB_Asaxpy3B__eq_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_int16
+GrB_Info GB_Adot4B__eq_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_int16
+GrB_Info GB_Adot2B__eq_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29412,10 +41026,10 @@ GrB_Info GB_Adot2B__land_eq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_int16
+GrB_Info GB_Adot3B__eq_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29423,32 +41037,32 @@ GrB_Info GB_Adot3B__land_eq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_int16
+GrB_Info GB_Asaxpy3B__eq_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_uint16
+GrB_Info GB_Adot4B__eq_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_uint16
+GrB_Info GB_Adot2B__eq_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29456,10 +41070,10 @@ GrB_Info GB_Adot2B__land_eq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_uint16
+GrB_Info GB_Adot3B__eq_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29467,32 +41081,32 @@ GrB_Info GB_Adot3B__land_eq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_uint16
+GrB_Info GB_Asaxpy3B__eq_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_int32
+GrB_Info GB_Adot4B__eq_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_int32
+GrB_Info GB_Adot2B__eq_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29500,10 +41114,10 @@ GrB_Info GB_Adot2B__land_eq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_int32
+GrB_Info GB_Adot3B__eq_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29511,32 +41125,32 @@ GrB_Info GB_Adot3B__land_eq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_int32
+GrB_Info GB_Asaxpy3B__eq_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_uint32
+GrB_Info GB_Adot4B__eq_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_uint32
+GrB_Info GB_Adot2B__eq_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29544,10 +41158,10 @@ GrB_Info GB_Adot2B__land_eq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_uint32
+GrB_Info GB_Adot3B__eq_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29555,32 +41169,32 @@ GrB_Info GB_Adot3B__land_eq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_uint32
+GrB_Info GB_Asaxpy3B__eq_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_int64
+GrB_Info GB_Adot4B__eq_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_int64
+GrB_Info GB_Adot2B__eq_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29588,10 +41202,10 @@ GrB_Info GB_Adot2B__land_eq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_int64
+GrB_Info GB_Adot3B__eq_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29599,32 +41213,32 @@ GrB_Info GB_Adot3B__land_eq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_int64
+GrB_Info GB_Asaxpy3B__eq_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_uint64
+GrB_Info GB_Adot4B__eq_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_uint64
+GrB_Info GB_Adot2B__lor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29632,10 +41246,10 @@ GrB_Info GB_Adot2B__land_eq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_uint64
+GrB_Info GB_Adot3B__lor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29643,32 +41257,32 @@ GrB_Info GB_Adot3B__land_eq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_uint64
+GrB_Info GB_Asaxpy3B__lor_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_fp32
+GrB_Info GB_Adot4B__lor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_fp32
+GrB_Info GB_Adot2B__lor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29676,10 +41290,10 @@ GrB_Info GB_Adot2B__land_eq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_fp32
+GrB_Info GB_Adot3B__lor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29687,32 +41301,32 @@ GrB_Info GB_Adot3B__land_eq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_fp32
+GrB_Info GB_Asaxpy3B__lor_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_eq_fp64
+GrB_Info GB_Adot4B__lor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_eq_fp64
+GrB_Info GB_Adot2B__lor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29720,10 +41334,10 @@ GrB_Info GB_Adot2B__land_eq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_eq_fp64
+GrB_Info GB_Adot3B__lor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29731,32 +41345,32 @@ GrB_Info GB_Adot3B__land_eq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_eq_fp64
+GrB_Info GB_Asaxpy3B__lor_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_bool
+GrB_Info GB_Adot4B__lor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_bool
+GrB_Info GB_Adot2B__lor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29764,10 +41378,10 @@ GrB_Info GB_Adot2B__lxor_eq_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_bool
+GrB_Info GB_Adot3B__lor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29775,32 +41389,32 @@ GrB_Info GB_Adot3B__lxor_eq_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_bool
+GrB_Info GB_Asaxpy3B__lor_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_int8
+GrB_Info GB_Adot4B__lor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_int8
+GrB_Info GB_Adot2B__lor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29808,10 +41422,10 @@ GrB_Info GB_Adot2B__lxor_eq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_int8
+GrB_Info GB_Adot3B__lor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29819,32 +41433,32 @@ GrB_Info GB_Adot3B__lxor_eq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_int8
+GrB_Info GB_Asaxpy3B__lor_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_uint8
+GrB_Info GB_Adot4B__lor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_uint8
+GrB_Info GB_Adot2B__lor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29852,10 +41466,10 @@ GrB_Info GB_Adot2B__lxor_eq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_uint8
+GrB_Info GB_Adot3B__lor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29863,32 +41477,32 @@ GrB_Info GB_Adot3B__lxor_eq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_uint8
+GrB_Info GB_Asaxpy3B__lor_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_int16
+GrB_Info GB_Adot4B__lor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_int16
+GrB_Info GB_Adot2B__lor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29896,10 +41510,10 @@ GrB_Info GB_Adot2B__lxor_eq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_int16
+GrB_Info GB_Adot3B__lor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29907,32 +41521,32 @@ GrB_Info GB_Adot3B__lxor_eq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_int16
+GrB_Info GB_Asaxpy3B__lor_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_uint16
+GrB_Info GB_Adot4B__lor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_uint16
+GrB_Info GB_Adot2B__lor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29940,10 +41554,10 @@ GrB_Info GB_Adot2B__lxor_eq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_uint16
+GrB_Info GB_Adot3B__lor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29951,32 +41565,32 @@ GrB_Info GB_Adot3B__lxor_eq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_uint16
+GrB_Info GB_Asaxpy3B__lor_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_int32
+GrB_Info GB_Adot4B__lor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_int32
+GrB_Info GB_Adot2B__lor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -29984,10 +41598,10 @@ GrB_Info GB_Adot2B__lxor_eq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_int32
+GrB_Info GB_Adot3B__lor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29995,32 +41609,32 @@ GrB_Info GB_Adot3B__lxor_eq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_int32
+GrB_Info GB_Asaxpy3B__lor_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_uint32
+GrB_Info GB_Adot4B__lor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_uint32
+GrB_Info GB_Adot2B__lor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30028,10 +41642,10 @@ GrB_Info GB_Adot2B__lxor_eq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_uint32
+GrB_Info GB_Adot3B__lor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30039,32 +41653,32 @@ GrB_Info GB_Adot3B__lxor_eq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_uint32
+GrB_Info GB_Asaxpy3B__lor_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_int64
+GrB_Info GB_Adot4B__lor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_int64
+GrB_Info GB_Adot2B__lor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30072,10 +41686,10 @@ GrB_Info GB_Adot2B__lxor_eq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_int64
+GrB_Info GB_Adot3B__lor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30083,32 +41697,32 @@ GrB_Info GB_Adot3B__lxor_eq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_int64
+GrB_Info GB_Asaxpy3B__lor_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_uint64
+GrB_Info GB_Adot4B__lor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_uint64
+GrB_Info GB_Adot2B__any_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30116,10 +41730,10 @@ GrB_Info GB_Adot2B__lxor_eq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_uint64
+GrB_Info GB_Adot3B__any_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30127,32 +41741,32 @@ GrB_Info GB_Adot3B__lxor_eq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_uint64
+GrB_Info GB_Asaxpy3B__any_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_fp32
+GrB_Info GB_Adot4B__any_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_fp32
+GrB_Info GB_Adot2B__any_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30160,10 +41774,10 @@ GrB_Info GB_Adot2B__lxor_eq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_fp32
+GrB_Info GB_Adot3B__any_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30171,32 +41785,32 @@ GrB_Info GB_Adot3B__lxor_eq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_fp32
+GrB_Info GB_Asaxpy3B__any_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_eq_fp64
+GrB_Info GB_Adot4B__any_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_eq_fp64
+GrB_Info GB_Adot2B__any_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30204,10 +41818,10 @@ GrB_Info GB_Adot2B__lxor_eq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_eq_fp64
+GrB_Info GB_Adot3B__any_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30215,32 +41829,32 @@ GrB_Info GB_Adot3B__lxor_eq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_eq_fp64
+GrB_Info GB_Asaxpy3B__any_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_bool
+GrB_Info GB_Adot4B__any_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_bool
+GrB_Info GB_Adot2B__any_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30248,10 +41862,10 @@ GrB_Info GB_Adot2B__eq_eq_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_bool
+GrB_Info GB_Adot3B__any_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30259,32 +41873,32 @@ GrB_Info GB_Adot3B__eq_eq_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_bool
+GrB_Info GB_Asaxpy3B__any_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_int8
+GrB_Info GB_Adot4B__any_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_int8
+GrB_Info GB_Adot2B__any_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30292,10 +41906,10 @@ GrB_Info GB_Adot2B__eq_eq_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_int8
+GrB_Info GB_Adot3B__any_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30303,32 +41917,32 @@ GrB_Info GB_Adot3B__eq_eq_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_int8
+GrB_Info GB_Asaxpy3B__any_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_uint8
+GrB_Info GB_Adot4B__any_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_uint8
+GrB_Info GB_Adot2B__any_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30336,10 +41950,10 @@ GrB_Info GB_Adot2B__eq_eq_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_uint8
+GrB_Info GB_Adot3B__any_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30347,32 +41961,32 @@ GrB_Info GB_Adot3B__eq_eq_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_uint8
+GrB_Info GB_Asaxpy3B__any_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_int16
+GrB_Info GB_Adot4B__any_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_int16
+GrB_Info GB_Adot2B__any_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30380,10 +41994,10 @@ GrB_Info GB_Adot2B__eq_eq_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_int16
+GrB_Info GB_Adot3B__any_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30391,32 +42005,32 @@ GrB_Info GB_Adot3B__eq_eq_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_int16
+GrB_Info GB_Asaxpy3B__any_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_uint16
+GrB_Info GB_Adot4B__any_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_uint16
+GrB_Info GB_Adot2B__any_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30424,10 +42038,10 @@ GrB_Info GB_Adot2B__eq_eq_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_uint16
+GrB_Info GB_Adot3B__any_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30435,32 +42049,32 @@ GrB_Info GB_Adot3B__eq_eq_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_uint16
+GrB_Info GB_Asaxpy3B__any_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_int32
+GrB_Info GB_Adot4B__any_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_int32
+GrB_Info GB_Adot2B__any_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30468,10 +42082,10 @@ GrB_Info GB_Adot2B__eq_eq_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_int32
+GrB_Info GB_Adot3B__any_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30479,32 +42093,32 @@ GrB_Info GB_Adot3B__eq_eq_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_int32
+GrB_Info GB_Asaxpy3B__any_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_uint32
+GrB_Info GB_Adot4B__any_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_uint32
+GrB_Info GB_Adot2B__any_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30512,10 +42126,10 @@ GrB_Info GB_Adot2B__eq_eq_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_uint32
+GrB_Info GB_Adot3B__any_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30523,32 +42137,32 @@ GrB_Info GB_Adot3B__eq_eq_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_uint32
+GrB_Info GB_Asaxpy3B__any_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_int64
+GrB_Info GB_Adot4B__any_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_int64
+GrB_Info GB_Adot2B__any_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30556,10 +42170,10 @@ GrB_Info GB_Adot2B__eq_eq_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_int64
+GrB_Info GB_Adot3B__any_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30567,32 +42181,32 @@ GrB_Info GB_Adot3B__eq_eq_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_int64
+GrB_Info GB_Asaxpy3B__any_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_uint64
+GrB_Info GB_Adot4B__any_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_uint64
+GrB_Info GB_Adot2B__land_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30600,10 +42214,10 @@ GrB_Info GB_Adot2B__eq_eq_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_uint64
+GrB_Info GB_Adot3B__land_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30611,32 +42225,32 @@ GrB_Info GB_Adot3B__eq_eq_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_uint64
+GrB_Info GB_Asaxpy3B__land_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_fp32
+GrB_Info GB_Adot4B__land_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_fp32
+GrB_Info GB_Adot2B__land_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30644,10 +42258,10 @@ GrB_Info GB_Adot2B__eq_eq_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_fp32
+GrB_Info GB_Adot3B__land_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30655,32 +42269,32 @@ GrB_Info GB_Adot3B__eq_eq_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_fp32
+GrB_Info GB_Asaxpy3B__land_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_eq_fp64
+GrB_Info GB_Adot4B__land_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_eq_fp64
+GrB_Info GB_Adot2B__land_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30688,10 +42302,10 @@ GrB_Info GB_Adot2B__eq_eq_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_eq_fp64
+GrB_Info GB_Adot3B__land_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30699,32 +42313,32 @@ GrB_Info GB_Adot3B__eq_eq_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_eq_fp64
+GrB_Info GB_Asaxpy3B__land_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_int8
+GrB_Info GB_Adot4B__land_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_int8
+GrB_Info GB_Adot2B__land_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30732,10 +42346,10 @@ GrB_Info GB_Adot2B__lor_ne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_int8
+GrB_Info GB_Adot3B__land_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30743,32 +42357,32 @@ GrB_Info GB_Adot3B__lor_ne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_int8
+GrB_Info GB_Asaxpy3B__land_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_uint8
+GrB_Info GB_Adot4B__land_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_uint8
+GrB_Info GB_Adot2B__land_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30776,10 +42390,10 @@ GrB_Info GB_Adot2B__lor_ne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_uint8
+GrB_Info GB_Adot3B__land_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30787,32 +42401,32 @@ GrB_Info GB_Adot3B__lor_ne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_uint8
+GrB_Info GB_Asaxpy3B__land_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_int16
+GrB_Info GB_Adot4B__land_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_int16
+GrB_Info GB_Adot2B__land_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30820,10 +42434,10 @@ GrB_Info GB_Adot2B__lor_ne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_int16
+GrB_Info GB_Adot3B__land_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30831,32 +42445,32 @@ GrB_Info GB_Adot3B__lor_ne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_int16
+GrB_Info GB_Asaxpy3B__land_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_uint16
+GrB_Info GB_Adot4B__land_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_uint16
+GrB_Info GB_Adot2B__land_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30864,10 +42478,10 @@ GrB_Info GB_Adot2B__lor_ne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_uint16
+GrB_Info GB_Adot3B__land_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30875,32 +42489,32 @@ GrB_Info GB_Adot3B__lor_ne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_uint16
+GrB_Info GB_Asaxpy3B__land_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_int32
+GrB_Info GB_Adot4B__land_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_int32
+GrB_Info GB_Adot2B__land_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30908,10 +42522,10 @@ GrB_Info GB_Adot2B__lor_ne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_int32
+GrB_Info GB_Adot3B__land_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30919,32 +42533,32 @@ GrB_Info GB_Adot3B__lor_ne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_int32
+GrB_Info GB_Asaxpy3B__land_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_uint32
+GrB_Info GB_Adot4B__land_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_uint32
+GrB_Info GB_Adot2B__land_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30952,10 +42566,10 @@ GrB_Info GB_Adot2B__lor_ne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_uint32
+GrB_Info GB_Adot3B__land_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -30963,32 +42577,32 @@ GrB_Info GB_Adot3B__lor_ne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_uint32
+GrB_Info GB_Asaxpy3B__land_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_int64
+GrB_Info GB_Adot4B__land_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_int64
+GrB_Info GB_Adot2B__land_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -30996,10 +42610,10 @@ GrB_Info GB_Adot2B__lor_ne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_int64
+GrB_Info GB_Adot3B__land_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31007,32 +42621,32 @@ GrB_Info GB_Adot3B__lor_ne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_int64
+GrB_Info GB_Asaxpy3B__land_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_uint64
+GrB_Info GB_Adot4B__land_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_uint64
+GrB_Info GB_Adot2B__land_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31040,10 +42654,10 @@ GrB_Info GB_Adot2B__lor_ne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_uint64
+GrB_Info GB_Adot3B__land_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31051,32 +42665,32 @@ GrB_Info GB_Adot3B__lor_ne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_uint64
+GrB_Info GB_Asaxpy3B__land_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_fp32
+GrB_Info GB_Adot4B__land_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_fp32
+GrB_Info GB_Adot2B__lxor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31084,10 +42698,10 @@ GrB_Info GB_Adot2B__lor_ne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_fp32
+GrB_Info GB_Adot3B__lxor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31095,32 +42709,32 @@ GrB_Info GB_Adot3B__lor_ne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_fp32
+GrB_Info GB_Asaxpy3B__lxor_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ne_fp64
+GrB_Info GB_Adot4B__lxor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ne_fp64
+GrB_Info GB_Adot2B__lxor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31128,10 +42742,10 @@ GrB_Info GB_Adot2B__lor_ne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ne_fp64
+GrB_Info GB_Adot3B__lxor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31139,32 +42753,32 @@ GrB_Info GB_Adot3B__lor_ne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ne_fp64
+GrB_Info GB_Asaxpy3B__lxor_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_int8
+GrB_Info GB_Adot4B__lxor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_int8
+GrB_Info GB_Adot2B__lxor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31172,10 +42786,10 @@ GrB_Info GB_Adot2B__land_ne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_int8
+GrB_Info GB_Adot3B__lxor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31183,32 +42797,32 @@ GrB_Info GB_Adot3B__land_ne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_int8
+GrB_Info GB_Asaxpy3B__lxor_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_uint8
+GrB_Info GB_Adot4B__lxor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_uint8
+GrB_Info GB_Adot2B__lxor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31216,10 +42830,10 @@ GrB_Info GB_Adot2B__land_ne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_uint8
+GrB_Info GB_Adot3B__lxor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31227,32 +42841,32 @@ GrB_Info GB_Adot3B__land_ne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_uint8
+GrB_Info GB_Asaxpy3B__lxor_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_int16
+GrB_Info GB_Adot4B__lxor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_int16
+GrB_Info GB_Adot2B__lxor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31260,10 +42874,10 @@ GrB_Info GB_Adot2B__land_ne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_int16
+GrB_Info GB_Adot3B__lxor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31271,32 +42885,32 @@ GrB_Info GB_Adot3B__land_ne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_int16
+GrB_Info GB_Asaxpy3B__lxor_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_uint16
+GrB_Info GB_Adot4B__lxor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_uint16
+GrB_Info GB_Adot2B__lxor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31304,10 +42918,10 @@ GrB_Info GB_Adot2B__land_ne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_uint16
+GrB_Info GB_Adot3B__lxor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31315,32 +42929,32 @@ GrB_Info GB_Adot3B__land_ne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_uint16
+GrB_Info GB_Asaxpy3B__lxor_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_int32
+GrB_Info GB_Adot4B__lxor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_int32
+GrB_Info GB_Adot2B__lxor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31348,10 +42962,10 @@ GrB_Info GB_Adot2B__land_ne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_int32
+GrB_Info GB_Adot3B__lxor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31359,32 +42973,32 @@ GrB_Info GB_Adot3B__land_ne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_int32
+GrB_Info GB_Asaxpy3B__lxor_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_uint32
+GrB_Info GB_Adot4B__lxor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_uint32
+GrB_Info GB_Adot2B__lxor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31392,10 +43006,10 @@ GrB_Info GB_Adot2B__land_ne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_uint32
+GrB_Info GB_Adot3B__lxor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31403,32 +43017,32 @@ GrB_Info GB_Adot3B__land_ne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_uint32
+GrB_Info GB_Asaxpy3B__lxor_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_int64
+GrB_Info GB_Adot4B__lxor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_int64
+GrB_Info GB_Adot2B__lxor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31436,10 +43050,10 @@ GrB_Info GB_Adot2B__land_ne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_int64
+GrB_Info GB_Adot3B__lxor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31447,32 +43061,32 @@ GrB_Info GB_Adot3B__land_ne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_int64
+GrB_Info GB_Asaxpy3B__lxor_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_uint64
+GrB_Info GB_Adot4B__lxor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_uint64
+GrB_Info GB_Adot2B__lxor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31480,10 +43094,10 @@ GrB_Info GB_Adot2B__land_ne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_uint64
+GrB_Info GB_Adot3B__lxor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31491,32 +43105,32 @@ GrB_Info GB_Adot3B__land_ne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_uint64
+GrB_Info GB_Asaxpy3B__lxor_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_fp32
+GrB_Info GB_Adot4B__lxor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_fp32
+GrB_Info GB_Adot2B__lxor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31524,10 +43138,10 @@ GrB_Info GB_Adot2B__land_ne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_fp32
+GrB_Info GB_Adot3B__lxor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31535,32 +43149,32 @@ GrB_Info GB_Adot3B__land_ne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_fp32
+GrB_Info GB_Asaxpy3B__lxor_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ne_fp64
+GrB_Info GB_Adot4B__lxor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ne_fp64
+GrB_Info GB_Adot2B__eq_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31568,10 +43182,10 @@ GrB_Info GB_Adot2B__land_ne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ne_fp64
+GrB_Info GB_Adot3B__eq_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31579,32 +43193,32 @@ GrB_Info GB_Adot3B__land_ne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ne_fp64
+GrB_Info GB_Asaxpy3B__eq_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_int8
+GrB_Info GB_Adot4B__eq_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_int8
+GrB_Info GB_Adot2B__eq_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31612,10 +43226,10 @@ GrB_Info GB_Adot2B__lxor_ne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_int8
+GrB_Info GB_Adot3B__eq_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31623,32 +43237,32 @@ GrB_Info GB_Adot3B__lxor_ne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_int8
+GrB_Info GB_Asaxpy3B__eq_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_uint8
+GrB_Info GB_Adot4B__eq_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_uint8
+GrB_Info GB_Adot2B__eq_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31656,10 +43270,10 @@ GrB_Info GB_Adot2B__lxor_ne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_uint8
+GrB_Info GB_Adot3B__eq_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31667,32 +43281,32 @@ GrB_Info GB_Adot3B__lxor_ne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_uint8
+GrB_Info GB_Asaxpy3B__eq_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_int16
+GrB_Info GB_Adot4B__eq_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_int16
+GrB_Info GB_Adot2B__eq_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31700,10 +43314,10 @@ GrB_Info GB_Adot2B__lxor_ne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_int16
+GrB_Info GB_Adot3B__eq_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31711,32 +43325,32 @@ GrB_Info GB_Adot3B__lxor_ne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_int16
+GrB_Info GB_Asaxpy3B__eq_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_uint16
+GrB_Info GB_Adot4B__eq_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_uint16
+GrB_Info GB_Adot2B__eq_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31744,10 +43358,10 @@ GrB_Info GB_Adot2B__lxor_ne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_uint16
+GrB_Info GB_Adot3B__eq_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31755,32 +43369,32 @@ GrB_Info GB_Adot3B__lxor_ne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_uint16
+GrB_Info GB_Asaxpy3B__eq_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_int32
+GrB_Info GB_Adot4B__eq_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_int32
+GrB_Info GB_Adot2B__eq_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31788,10 +43402,10 @@ GrB_Info GB_Adot2B__lxor_ne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_int32
+GrB_Info GB_Adot3B__eq_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31799,32 +43413,32 @@ GrB_Info GB_Adot3B__lxor_ne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_int32
+GrB_Info GB_Asaxpy3B__eq_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_uint32
+GrB_Info GB_Adot4B__eq_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_uint32
+GrB_Info GB_Adot2B__eq_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31832,10 +43446,10 @@ GrB_Info GB_Adot2B__lxor_ne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_uint32
+GrB_Info GB_Adot3B__eq_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31843,32 +43457,32 @@ GrB_Info GB_Adot3B__lxor_ne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_uint32
+GrB_Info GB_Asaxpy3B__eq_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_int64
+GrB_Info GB_Adot4B__eq_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_int64
+GrB_Info GB_Adot2B__eq_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31876,10 +43490,10 @@ GrB_Info GB_Adot2B__lxor_ne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_int64
+GrB_Info GB_Adot3B__eq_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31887,32 +43501,32 @@ GrB_Info GB_Adot3B__lxor_ne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_int64
+GrB_Info GB_Asaxpy3B__eq_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_uint64
+GrB_Info GB_Adot4B__eq_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_uint64
+GrB_Info GB_Adot2B__eq_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31920,10 +43534,10 @@ GrB_Info GB_Adot2B__lxor_ne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_uint64
+GrB_Info GB_Adot3B__eq_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31931,32 +43545,32 @@ GrB_Info GB_Adot3B__lxor_ne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_uint64
+GrB_Info GB_Asaxpy3B__eq_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_fp32
+GrB_Info GB_Adot4B__eq_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_fp32
+GrB_Info GB_Adot2B__eq_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -31964,10 +43578,10 @@ GrB_Info GB_Adot2B__lxor_ne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_fp32
+GrB_Info GB_Adot3B__eq_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -31975,32 +43589,32 @@ GrB_Info GB_Adot3B__lxor_ne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_fp32
+GrB_Info GB_Asaxpy3B__eq_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ne_fp64
+GrB_Info GB_Adot4B__eq_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ne_fp64
+GrB_Info GB_Adot2B__eq_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32008,10 +43622,10 @@ GrB_Info GB_Adot2B__lxor_ne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ne_fp64
+GrB_Info GB_Adot3B__eq_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32019,32 +43633,32 @@ GrB_Info GB_Adot3B__lxor_ne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ne_fp64
+GrB_Info GB_Asaxpy3B__eq_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_int8
+GrB_Info GB_Adot4B__eq_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_int8
+GrB_Info GB_Adot2B__lor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32052,10 +43666,10 @@ GrB_Info GB_Adot2B__eq_ne_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_int8
+GrB_Info GB_Adot3B__lor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32063,32 +43677,32 @@ GrB_Info GB_Adot3B__eq_ne_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_int8
+GrB_Info GB_Asaxpy3B__lor_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_uint8
+GrB_Info GB_Adot4B__lor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_uint8
+GrB_Info GB_Adot2B__lor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32096,10 +43710,10 @@ GrB_Info GB_Adot2B__eq_ne_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_uint8
+GrB_Info GB_Adot3B__lor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32107,32 +43721,32 @@ GrB_Info GB_Adot3B__eq_ne_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_uint8
+GrB_Info GB_Asaxpy3B__lor_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_int16
+GrB_Info GB_Adot4B__lor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_int16
+GrB_Info GB_Adot2B__lor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32140,10 +43754,10 @@ GrB_Info GB_Adot2B__eq_ne_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_int16
+GrB_Info GB_Adot3B__lor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32151,32 +43765,32 @@ GrB_Info GB_Adot3B__eq_ne_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_int16
+GrB_Info GB_Asaxpy3B__lor_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_uint16
+GrB_Info GB_Adot4B__lor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_uint16
+GrB_Info GB_Adot2B__lor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32184,10 +43798,10 @@ GrB_Info GB_Adot2B__eq_ne_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_uint16
+GrB_Info GB_Adot3B__lor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32195,32 +43809,32 @@ GrB_Info GB_Adot3B__eq_ne_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_uint16
+GrB_Info GB_Asaxpy3B__lor_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_int32
+GrB_Info GB_Adot4B__lor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_int32
+GrB_Info GB_Adot2B__lor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32228,10 +43842,10 @@ GrB_Info GB_Adot2B__eq_ne_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_int32
+GrB_Info GB_Adot3B__lor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32239,32 +43853,32 @@ GrB_Info GB_Adot3B__eq_ne_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_int32
+GrB_Info GB_Asaxpy3B__lor_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_uint32
+GrB_Info GB_Adot4B__lor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_uint32
+GrB_Info GB_Adot2B__lor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32272,10 +43886,10 @@ GrB_Info GB_Adot2B__eq_ne_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_uint32
+GrB_Info GB_Adot3B__lor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32283,32 +43897,32 @@ GrB_Info GB_Adot3B__eq_ne_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_uint32
+GrB_Info GB_Asaxpy3B__lor_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_int64
+GrB_Info GB_Adot4B__lor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_int64
+GrB_Info GB_Adot2B__lor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32316,10 +43930,10 @@ GrB_Info GB_Adot2B__eq_ne_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_int64
+GrB_Info GB_Adot3B__lor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32327,32 +43941,32 @@ GrB_Info GB_Adot3B__eq_ne_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_int64
+GrB_Info GB_Asaxpy3B__lor_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_uint64
+GrB_Info GB_Adot4B__lor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_uint64
+GrB_Info GB_Adot2B__lor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32360,10 +43974,10 @@ GrB_Info GB_Adot2B__eq_ne_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_uint64
+GrB_Info GB_Adot3B__lor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32371,32 +43985,32 @@ GrB_Info GB_Adot3B__eq_ne_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_uint64
+GrB_Info GB_Asaxpy3B__lor_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_fp32
+GrB_Info GB_Adot4B__lor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_fp32
+GrB_Info GB_Adot2B__lor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32404,10 +44018,10 @@ GrB_Info GB_Adot2B__eq_ne_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_fp32
+GrB_Info GB_Adot3B__lor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32415,32 +44029,32 @@ GrB_Info GB_Adot3B__eq_ne_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_fp32
+GrB_Info GB_Asaxpy3B__lor_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ne_fp64
+GrB_Info GB_Adot4B__lor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ne_fp64
+GrB_Info GB_Adot2B__lor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32448,10 +44062,10 @@ GrB_Info GB_Adot2B__eq_ne_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ne_fp64
+GrB_Info GB_Adot3B__lor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32459,32 +44073,32 @@ GrB_Info GB_Adot3B__eq_ne_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ne_fp64
+GrB_Info GB_Asaxpy3B__lor_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_bool
+GrB_Info GB_Adot4B__lor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_bool
+GrB_Info GB_Adot2B__lor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32492,10 +44106,10 @@ GrB_Info GB_Adot2B__lor_gt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_bool
+GrB_Info GB_Adot3B__lor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32503,32 +44117,32 @@ GrB_Info GB_Adot3B__lor_gt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_bool
+GrB_Info GB_Asaxpy3B__lor_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_int8
+GrB_Info GB_Adot4B__lor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_int8
+GrB_Info GB_Adot2B__any_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32536,10 +44150,10 @@ GrB_Info GB_Adot2B__lor_gt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_int8
+GrB_Info GB_Adot3B__any_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32547,32 +44161,32 @@ GrB_Info GB_Adot3B__lor_gt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_int8
+GrB_Info GB_Asaxpy3B__any_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_uint8
+GrB_Info GB_Adot4B__any_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_uint8
+GrB_Info GB_Adot2B__any_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32580,10 +44194,10 @@ GrB_Info GB_Adot2B__lor_gt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_uint8
+GrB_Info GB_Adot3B__any_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32591,32 +44205,32 @@ GrB_Info GB_Adot3B__lor_gt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_uint8
+GrB_Info GB_Asaxpy3B__any_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_int16
+GrB_Info GB_Adot4B__any_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_int16
+GrB_Info GB_Adot2B__any_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32624,10 +44238,10 @@ GrB_Info GB_Adot2B__lor_gt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_int16
+GrB_Info GB_Adot3B__any_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32635,32 +44249,32 @@ GrB_Info GB_Adot3B__lor_gt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_int16
+GrB_Info GB_Asaxpy3B__any_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_uint16
+GrB_Info GB_Adot4B__any_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_uint16
+GrB_Info GB_Adot2B__any_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32668,10 +44282,10 @@ GrB_Info GB_Adot2B__lor_gt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_uint16
+GrB_Info GB_Adot3B__any_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32679,32 +44293,32 @@ GrB_Info GB_Adot3B__lor_gt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_uint16
+GrB_Info GB_Asaxpy3B__any_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_int32
+GrB_Info GB_Adot4B__any_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_int32
+GrB_Info GB_Adot2B__any_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32712,10 +44326,10 @@ GrB_Info GB_Adot2B__lor_gt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_int32
+GrB_Info GB_Adot3B__any_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32723,32 +44337,32 @@ GrB_Info GB_Adot3B__lor_gt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_int32
+GrB_Info GB_Asaxpy3B__any_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_uint32
+GrB_Info GB_Adot4B__any_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_uint32
+GrB_Info GB_Adot2B__any_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32756,10 +44370,10 @@ GrB_Info GB_Adot2B__lor_gt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_uint32
+GrB_Info GB_Adot3B__any_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32767,32 +44381,32 @@ GrB_Info GB_Adot3B__lor_gt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_uint32
+GrB_Info GB_Asaxpy3B__any_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_int64
+GrB_Info GB_Adot4B__any_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_int64
+GrB_Info GB_Adot2B__any_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32800,10 +44414,10 @@ GrB_Info GB_Adot2B__lor_gt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_int64
+GrB_Info GB_Adot3B__any_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32811,32 +44425,32 @@ GrB_Info GB_Adot3B__lor_gt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_int64
+GrB_Info GB_Asaxpy3B__any_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_uint64
+GrB_Info GB_Adot4B__any_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_uint64
+GrB_Info GB_Adot2B__any_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32844,10 +44458,10 @@ GrB_Info GB_Adot2B__lor_gt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_uint64
+GrB_Info GB_Adot3B__any_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32855,32 +44469,32 @@ GrB_Info GB_Adot3B__lor_gt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_uint64
+GrB_Info GB_Asaxpy3B__any_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_fp32
+GrB_Info GB_Adot4B__any_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_fp32
+GrB_Info GB_Adot2B__any_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32888,10 +44502,10 @@ GrB_Info GB_Adot2B__lor_gt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_fp32
+GrB_Info GB_Adot3B__any_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32899,32 +44513,32 @@ GrB_Info GB_Adot3B__lor_gt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_fp32
+GrB_Info GB_Asaxpy3B__any_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_gt_fp64
+GrB_Info GB_Adot4B__any_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_gt_fp64
+GrB_Info GB_Adot2B__any_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32932,10 +44546,10 @@ GrB_Info GB_Adot2B__lor_gt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_gt_fp64
+GrB_Info GB_Adot3B__any_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32943,32 +44557,32 @@ GrB_Info GB_Adot3B__lor_gt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_gt_fp64
+GrB_Info GB_Asaxpy3B__any_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_bool
+GrB_Info GB_Adot4B__any_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_bool
+GrB_Info GB_Adot2B__any_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -32976,10 +44590,10 @@ GrB_Info GB_Adot2B__land_gt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_bool
+GrB_Info GB_Adot3B__any_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -32987,32 +44601,32 @@ GrB_Info GB_Adot3B__land_gt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_bool
+GrB_Info GB_Asaxpy3B__any_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_int8
+GrB_Info GB_Adot4B__any_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_int8
+GrB_Info GB_Adot2B__land_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33020,10 +44634,10 @@ GrB_Info GB_Adot2B__land_gt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_int8
+GrB_Info GB_Adot3B__land_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33031,32 +44645,32 @@ GrB_Info GB_Adot3B__land_gt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_int8
+GrB_Info GB_Asaxpy3B__land_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_uint8
+GrB_Info GB_Adot4B__land_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_uint8
+GrB_Info GB_Adot2B__land_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33064,10 +44678,10 @@ GrB_Info GB_Adot2B__land_gt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_uint8
+GrB_Info GB_Adot3B__land_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33075,32 +44689,32 @@ GrB_Info GB_Adot3B__land_gt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_uint8
+GrB_Info GB_Asaxpy3B__land_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_int16
+GrB_Info GB_Adot4B__land_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_int16
+GrB_Info GB_Adot2B__land_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33108,10 +44722,10 @@ GrB_Info GB_Adot2B__land_gt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_int16
+GrB_Info GB_Adot3B__land_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33119,32 +44733,32 @@ GrB_Info GB_Adot3B__land_gt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_int16
+GrB_Info GB_Asaxpy3B__land_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_uint16
+GrB_Info GB_Adot4B__land_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_uint16
+GrB_Info GB_Adot2B__land_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33152,10 +44766,10 @@ GrB_Info GB_Adot2B__land_gt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_uint16
+GrB_Info GB_Adot3B__land_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33163,32 +44777,32 @@ GrB_Info GB_Adot3B__land_gt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_uint16
+GrB_Info GB_Asaxpy3B__land_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_int32
+GrB_Info GB_Adot4B__land_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_int32
+GrB_Info GB_Adot2B__land_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33196,10 +44810,10 @@ GrB_Info GB_Adot2B__land_gt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_int32
+GrB_Info GB_Adot3B__land_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33207,32 +44821,32 @@ GrB_Info GB_Adot3B__land_gt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_int32
+GrB_Info GB_Asaxpy3B__land_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_uint32
+GrB_Info GB_Adot4B__land_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_uint32
+GrB_Info GB_Adot2B__land_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33240,10 +44854,10 @@ GrB_Info GB_Adot2B__land_gt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_uint32
+GrB_Info GB_Adot3B__land_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33251,32 +44865,32 @@ GrB_Info GB_Adot3B__land_gt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_uint32
+GrB_Info GB_Asaxpy3B__land_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_int64
+GrB_Info GB_Adot4B__land_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_int64
+GrB_Info GB_Adot2B__land_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33284,10 +44898,10 @@ GrB_Info GB_Adot2B__land_gt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_int64
+GrB_Info GB_Adot3B__land_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33295,32 +44909,32 @@ GrB_Info GB_Adot3B__land_gt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_int64
+GrB_Info GB_Asaxpy3B__land_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_uint64
+GrB_Info GB_Adot4B__land_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_uint64
+GrB_Info GB_Adot2B__land_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33328,10 +44942,10 @@ GrB_Info GB_Adot2B__land_gt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_uint64
+GrB_Info GB_Adot3B__land_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33339,32 +44953,32 @@ GrB_Info GB_Adot3B__land_gt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_uint64
+GrB_Info GB_Asaxpy3B__land_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_fp32
+GrB_Info GB_Adot4B__land_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_fp32
+GrB_Info GB_Adot2B__land_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33372,10 +44986,10 @@ GrB_Info GB_Adot2B__land_gt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_fp32
+GrB_Info GB_Adot3B__land_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33383,32 +44997,32 @@ GrB_Info GB_Adot3B__land_gt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_fp32
+GrB_Info GB_Asaxpy3B__land_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_gt_fp64
+GrB_Info GB_Adot4B__land_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_gt_fp64
+GrB_Info GB_Adot2B__land_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33416,10 +45030,10 @@ GrB_Info GB_Adot2B__land_gt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_gt_fp64
+GrB_Info GB_Adot3B__land_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33427,32 +45041,32 @@ GrB_Info GB_Adot3B__land_gt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_gt_fp64
+GrB_Info GB_Asaxpy3B__land_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_bool
+GrB_Info GB_Adot4B__land_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_bool
+GrB_Info GB_Adot2B__land_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33460,10 +45074,10 @@ GrB_Info GB_Adot2B__lxor_gt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_bool
+GrB_Info GB_Adot3B__land_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33471,32 +45085,32 @@ GrB_Info GB_Adot3B__lxor_gt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_bool
+GrB_Info GB_Asaxpy3B__land_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_int8
+GrB_Info GB_Adot4B__land_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_int8
+GrB_Info GB_Adot2B__lxor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33504,10 +45118,10 @@ GrB_Info GB_Adot2B__lxor_gt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_int8
+GrB_Info GB_Adot3B__lxor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33515,32 +45129,32 @@ GrB_Info GB_Adot3B__lxor_gt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_int8
+GrB_Info GB_Asaxpy3B__lxor_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_uint8
+GrB_Info GB_Adot4B__lxor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_uint8
+GrB_Info GB_Adot2B__lxor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33548,10 +45162,10 @@ GrB_Info GB_Adot2B__lxor_gt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_uint8
+GrB_Info GB_Adot3B__lxor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33559,32 +45173,32 @@ GrB_Info GB_Adot3B__lxor_gt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_uint8
+GrB_Info GB_Asaxpy3B__lxor_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_int16
+GrB_Info GB_Adot4B__lxor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_int16
+GrB_Info GB_Adot2B__lxor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33592,10 +45206,10 @@ GrB_Info GB_Adot2B__lxor_gt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_int16
+GrB_Info GB_Adot3B__lxor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33603,32 +45217,32 @@ GrB_Info GB_Adot3B__lxor_gt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_int16
+GrB_Info GB_Asaxpy3B__lxor_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_uint16
+GrB_Info GB_Adot4B__lxor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_uint16
+GrB_Info GB_Adot2B__lxor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33636,10 +45250,10 @@ GrB_Info GB_Adot2B__lxor_gt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_uint16
+GrB_Info GB_Adot3B__lxor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33647,32 +45261,32 @@ GrB_Info GB_Adot3B__lxor_gt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_uint16
+GrB_Info GB_Asaxpy3B__lxor_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_int32
+GrB_Info GB_Adot4B__lxor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_int32
+GrB_Info GB_Adot2B__lxor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33680,10 +45294,10 @@ GrB_Info GB_Adot2B__lxor_gt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_int32
+GrB_Info GB_Adot3B__lxor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33691,32 +45305,32 @@ GrB_Info GB_Adot3B__lxor_gt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_int32
+GrB_Info GB_Asaxpy3B__lxor_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_uint32
+GrB_Info GB_Adot4B__lxor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_uint32
+GrB_Info GB_Adot2B__lxor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33724,10 +45338,10 @@ GrB_Info GB_Adot2B__lxor_gt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_uint32
+GrB_Info GB_Adot3B__lxor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33735,32 +45349,32 @@ GrB_Info GB_Adot3B__lxor_gt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_uint32
+GrB_Info GB_Asaxpy3B__lxor_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_int64
+GrB_Info GB_Adot4B__lxor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_int64
+GrB_Info GB_Adot2B__lxor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33768,10 +45382,10 @@ GrB_Info GB_Adot2B__lxor_gt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_int64
+GrB_Info GB_Adot3B__lxor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33779,32 +45393,32 @@ GrB_Info GB_Adot3B__lxor_gt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_int64
+GrB_Info GB_Asaxpy3B__lxor_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_uint64
+GrB_Info GB_Adot4B__lxor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_uint64
+GrB_Info GB_Adot2B__lxor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33812,10 +45426,10 @@ GrB_Info GB_Adot2B__lxor_gt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_uint64
+GrB_Info GB_Adot3B__lxor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33823,32 +45437,32 @@ GrB_Info GB_Adot3B__lxor_gt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_uint64
+GrB_Info GB_Asaxpy3B__lxor_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_fp32
+GrB_Info GB_Adot4B__lxor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_fp32
+GrB_Info GB_Adot2B__lxor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33856,10 +45470,10 @@ GrB_Info GB_Adot2B__lxor_gt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_fp32
+GrB_Info GB_Adot3B__lxor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33867,32 +45481,32 @@ GrB_Info GB_Adot3B__lxor_gt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_fp32
+GrB_Info GB_Asaxpy3B__lxor_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_gt_fp64
+GrB_Info GB_Adot4B__lxor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_gt_fp64
+GrB_Info GB_Adot2B__lxor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33900,10 +45514,10 @@ GrB_Info GB_Adot2B__lxor_gt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_gt_fp64
+GrB_Info GB_Adot3B__lxor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33911,32 +45525,32 @@ GrB_Info GB_Adot3B__lxor_gt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_gt_fp64
+GrB_Info GB_Asaxpy3B__lxor_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_bool
+GrB_Info GB_Adot4B__lxor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_bool
+GrB_Info GB_Adot2B__lxor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33944,10 +45558,10 @@ GrB_Info GB_Adot2B__eq_gt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_bool
+GrB_Info GB_Adot3B__lxor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33955,32 +45569,32 @@ GrB_Info GB_Adot3B__eq_gt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_bool
+GrB_Info GB_Asaxpy3B__lxor_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_int8
+GrB_Info GB_Adot4B__lxor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_int8
+GrB_Info GB_Adot2B__eq_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -33988,10 +45602,10 @@ GrB_Info GB_Adot2B__eq_gt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_int8
+GrB_Info GB_Adot3B__eq_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -33999,32 +45613,32 @@ GrB_Info GB_Adot3B__eq_gt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_int8
+GrB_Info GB_Asaxpy3B__eq_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_uint8
+GrB_Info GB_Adot4B__eq_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_uint8
+GrB_Info GB_Adot2B__eq_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34032,10 +45646,10 @@ GrB_Info GB_Adot2B__eq_gt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_uint8
+GrB_Info GB_Adot3B__eq_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34043,32 +45657,32 @@ GrB_Info GB_Adot3B__eq_gt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_uint8
+GrB_Info GB_Asaxpy3B__eq_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_int16
+GrB_Info GB_Adot4B__eq_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_int16
+GrB_Info GB_Adot2B__eq_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34076,10 +45690,10 @@ GrB_Info GB_Adot2B__eq_gt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_int16
+GrB_Info GB_Adot3B__eq_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34087,32 +45701,32 @@ GrB_Info GB_Adot3B__eq_gt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_int16
+GrB_Info GB_Asaxpy3B__eq_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_uint16
+GrB_Info GB_Adot4B__eq_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_uint16
+GrB_Info GB_Adot2B__eq_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34120,10 +45734,10 @@ GrB_Info GB_Adot2B__eq_gt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_uint16
+GrB_Info GB_Adot3B__eq_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34131,32 +45745,32 @@ GrB_Info GB_Adot3B__eq_gt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_uint16
+GrB_Info GB_Asaxpy3B__eq_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_int32
+GrB_Info GB_Adot4B__eq_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_int32
+GrB_Info GB_Adot2B__eq_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34164,10 +45778,10 @@ GrB_Info GB_Adot2B__eq_gt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_int32
+GrB_Info GB_Adot3B__eq_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34175,32 +45789,32 @@ GrB_Info GB_Adot3B__eq_gt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_int32
+GrB_Info GB_Asaxpy3B__eq_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_uint32
+GrB_Info GB_Adot4B__eq_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_uint32
+GrB_Info GB_Adot2B__eq_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34208,10 +45822,10 @@ GrB_Info GB_Adot2B__eq_gt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_uint32
+GrB_Info GB_Adot3B__eq_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34219,32 +45833,32 @@ GrB_Info GB_Adot3B__eq_gt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_uint32
+GrB_Info GB_Asaxpy3B__eq_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_int64
+GrB_Info GB_Adot4B__eq_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_int64
+GrB_Info GB_Adot2B__eq_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34252,10 +45866,10 @@ GrB_Info GB_Adot2B__eq_gt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_int64
+GrB_Info GB_Adot3B__eq_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34263,32 +45877,32 @@ GrB_Info GB_Adot3B__eq_gt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_int64
+GrB_Info GB_Asaxpy3B__eq_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_uint64
+GrB_Info GB_Adot4B__eq_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_uint64
+GrB_Info GB_Adot2B__eq_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34296,10 +45910,10 @@ GrB_Info GB_Adot2B__eq_gt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_uint64
+GrB_Info GB_Adot3B__eq_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34307,32 +45921,32 @@ GrB_Info GB_Adot3B__eq_gt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_uint64
+GrB_Info GB_Asaxpy3B__eq_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_fp32
+GrB_Info GB_Adot4B__eq_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_fp32
+GrB_Info GB_Adot2B__eq_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34340,10 +45954,10 @@ GrB_Info GB_Adot2B__eq_gt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_fp32
+GrB_Info GB_Adot3B__eq_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34351,32 +45965,32 @@ GrB_Info GB_Adot3B__eq_gt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_fp32
+GrB_Info GB_Asaxpy3B__eq_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_gt_fp64
+GrB_Info GB_Adot4B__eq_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_gt_fp64
+GrB_Info GB_Adot2B__eq_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34384,10 +45998,10 @@ GrB_Info GB_Adot2B__eq_gt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_gt_fp64
+GrB_Info GB_Adot3B__eq_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34395,32 +46009,32 @@ GrB_Info GB_Adot3B__eq_gt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_gt_fp64
+GrB_Info GB_Asaxpy3B__eq_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_bool
+GrB_Info GB_Adot4B__eq_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_bool
+GrB_Info GB_Adot2B__eq_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34428,10 +46042,10 @@ GrB_Info GB_Adot2B__lor_lt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_bool
+GrB_Info GB_Adot3B__eq_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34439,32 +46053,32 @@ GrB_Info GB_Adot3B__lor_lt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_bool
+GrB_Info GB_Asaxpy3B__eq_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_int8
+GrB_Info GB_Adot4B__eq_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_int8
+GrB_Info GB_Adot2B__lor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34472,10 +46086,10 @@ GrB_Info GB_Adot2B__lor_lt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_int8
+GrB_Info GB_Adot3B__lor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34483,32 +46097,32 @@ GrB_Info GB_Adot3B__lor_lt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_int8
+GrB_Info GB_Asaxpy3B__lor_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_uint8
+GrB_Info GB_Adot4B__lor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_uint8
+GrB_Info GB_Adot2B__lor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34516,10 +46130,10 @@ GrB_Info GB_Adot2B__lor_lt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_uint8
+GrB_Info GB_Adot3B__lor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34527,32 +46141,32 @@ GrB_Info GB_Adot3B__lor_lt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_uint8
+GrB_Info GB_Asaxpy3B__lor_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_int16
+GrB_Info GB_Adot4B__lor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_int16
+GrB_Info GB_Adot2B__lor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34560,10 +46174,10 @@ GrB_Info GB_Adot2B__lor_lt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_int16
+GrB_Info GB_Adot3B__lor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34571,32 +46185,32 @@ GrB_Info GB_Adot3B__lor_lt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_int16
+GrB_Info GB_Asaxpy3B__lor_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_uint16
+GrB_Info GB_Adot4B__lor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_uint16
+GrB_Info GB_Adot2B__lor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34604,10 +46218,10 @@ GrB_Info GB_Adot2B__lor_lt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_uint16
+GrB_Info GB_Adot3B__lor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34615,32 +46229,32 @@ GrB_Info GB_Adot3B__lor_lt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_uint16
+GrB_Info GB_Asaxpy3B__lor_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_int32
+GrB_Info GB_Adot4B__lor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_int32
+GrB_Info GB_Adot2B__lor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34648,10 +46262,10 @@ GrB_Info GB_Adot2B__lor_lt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_int32
+GrB_Info GB_Adot3B__lor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34659,32 +46273,32 @@ GrB_Info GB_Adot3B__lor_lt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_int32
+GrB_Info GB_Asaxpy3B__lor_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_uint32
+GrB_Info GB_Adot4B__lor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_uint32
+GrB_Info GB_Adot2B__lor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34692,10 +46306,10 @@ GrB_Info GB_Adot2B__lor_lt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_uint32
+GrB_Info GB_Adot3B__lor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34703,32 +46317,32 @@ GrB_Info GB_Adot3B__lor_lt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_uint32
+GrB_Info GB_Asaxpy3B__lor_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_int64
+GrB_Info GB_Adot4B__lor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_int64
+GrB_Info GB_Adot2B__lor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34736,10 +46350,10 @@ GrB_Info GB_Adot2B__lor_lt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_int64
+GrB_Info GB_Adot3B__lor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34747,32 +46361,32 @@ GrB_Info GB_Adot3B__lor_lt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_int64
+GrB_Info GB_Asaxpy3B__lor_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_uint64
+GrB_Info GB_Adot4B__lor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_uint64
+GrB_Info GB_Adot2B__lor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34780,10 +46394,10 @@ GrB_Info GB_Adot2B__lor_lt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_uint64
+GrB_Info GB_Adot3B__lor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34791,32 +46405,32 @@ GrB_Info GB_Adot3B__lor_lt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_uint64
+GrB_Info GB_Asaxpy3B__lor_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_fp32
+GrB_Info GB_Adot4B__lor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_fp32
+GrB_Info GB_Adot2B__lor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34824,10 +46438,10 @@ GrB_Info GB_Adot2B__lor_lt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_fp32
+GrB_Info GB_Adot3B__lor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34835,32 +46449,32 @@ GrB_Info GB_Adot3B__lor_lt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_fp32
+GrB_Info GB_Asaxpy3B__lor_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lt_fp64
+GrB_Info GB_Adot4B__lor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lt_fp64
+GrB_Info GB_Adot2B__lor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34868,10 +46482,10 @@ GrB_Info GB_Adot2B__lor_lt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lt_fp64
+GrB_Info GB_Adot3B__lor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34879,32 +46493,32 @@ GrB_Info GB_Adot3B__lor_lt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lt_fp64
+GrB_Info GB_Asaxpy3B__lor_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_bool
+GrB_Info GB_Adot4B__lor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_bool
+GrB_Info GB_Adot2B__lor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34912,10 +46526,10 @@ GrB_Info GB_Adot2B__land_lt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_bool
+GrB_Info GB_Adot3B__lor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34923,32 +46537,32 @@ GrB_Info GB_Adot3B__land_lt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_bool
+GrB_Info GB_Asaxpy3B__lor_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_int8
+GrB_Info GB_Adot4B__lor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_int8
+GrB_Info GB_Adot2B__any_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -34956,10 +46570,10 @@ GrB_Info GB_Adot2B__land_lt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_int8
+GrB_Info GB_Adot3B__any_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -34967,32 +46581,32 @@ GrB_Info GB_Adot3B__land_lt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_int8
+GrB_Info GB_Asaxpy3B__any_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_uint8
+GrB_Info GB_Adot4B__any_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_uint8
+GrB_Info GB_Adot2B__any_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35000,10 +46614,10 @@ GrB_Info GB_Adot2B__land_lt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_uint8
+GrB_Info GB_Adot3B__any_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35011,32 +46625,32 @@ GrB_Info GB_Adot3B__land_lt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_uint8
+GrB_Info GB_Asaxpy3B__any_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_int16
+GrB_Info GB_Adot4B__any_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_int16
+GrB_Info GB_Adot2B__any_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35044,10 +46658,10 @@ GrB_Info GB_Adot2B__land_lt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_int16
+GrB_Info GB_Adot3B__any_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35055,32 +46669,32 @@ GrB_Info GB_Adot3B__land_lt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_int16
+GrB_Info GB_Asaxpy3B__any_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_uint16
+GrB_Info GB_Adot4B__any_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_uint16
+GrB_Info GB_Adot2B__any_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35088,10 +46702,10 @@ GrB_Info GB_Adot2B__land_lt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_uint16
+GrB_Info GB_Adot3B__any_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35099,32 +46713,32 @@ GrB_Info GB_Adot3B__land_lt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_uint16
+GrB_Info GB_Asaxpy3B__any_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_int32
+GrB_Info GB_Adot4B__any_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_int32
+GrB_Info GB_Adot2B__any_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35132,10 +46746,10 @@ GrB_Info GB_Adot2B__land_lt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_int32
+GrB_Info GB_Adot3B__any_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35143,32 +46757,32 @@ GrB_Info GB_Adot3B__land_lt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_int32
+GrB_Info GB_Asaxpy3B__any_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_uint32
+GrB_Info GB_Adot4B__any_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_uint32
+GrB_Info GB_Adot2B__any_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35176,10 +46790,10 @@ GrB_Info GB_Adot2B__land_lt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_uint32
+GrB_Info GB_Adot3B__any_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35187,32 +46801,32 @@ GrB_Info GB_Adot3B__land_lt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_uint32
+GrB_Info GB_Asaxpy3B__any_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_int64
+GrB_Info GB_Adot4B__any_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_int64
+GrB_Info GB_Adot2B__any_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35220,10 +46834,10 @@ GrB_Info GB_Adot2B__land_lt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_int64
+GrB_Info GB_Adot3B__any_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35231,32 +46845,32 @@ GrB_Info GB_Adot3B__land_lt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_int64
+GrB_Info GB_Asaxpy3B__any_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_uint64
+GrB_Info GB_Adot4B__any_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_uint64
+GrB_Info GB_Adot2B__any_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35264,10 +46878,10 @@ GrB_Info GB_Adot2B__land_lt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_uint64
+GrB_Info GB_Adot3B__any_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35275,32 +46889,32 @@ GrB_Info GB_Adot3B__land_lt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_uint64
+GrB_Info GB_Asaxpy3B__any_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_fp32
+GrB_Info GB_Adot4B__any_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_fp32
+GrB_Info GB_Adot2B__any_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35308,10 +46922,10 @@ GrB_Info GB_Adot2B__land_lt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_fp32
+GrB_Info GB_Adot3B__any_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35319,32 +46933,32 @@ GrB_Info GB_Adot3B__land_lt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_fp32
+GrB_Info GB_Asaxpy3B__any_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lt_fp64
+GrB_Info GB_Adot4B__any_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lt_fp64
+GrB_Info GB_Adot2B__any_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35352,10 +46966,10 @@ GrB_Info GB_Adot2B__land_lt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lt_fp64
+GrB_Info GB_Adot3B__any_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35363,32 +46977,32 @@ GrB_Info GB_Adot3B__land_lt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lt_fp64
+GrB_Info GB_Asaxpy3B__any_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_bool
+GrB_Info GB_Adot4B__any_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_bool
+GrB_Info GB_Adot2B__any_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35396,10 +47010,10 @@ GrB_Info GB_Adot2B__lxor_lt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_bool
+GrB_Info GB_Adot3B__any_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35407,32 +47021,32 @@ GrB_Info GB_Adot3B__lxor_lt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_bool
+GrB_Info GB_Asaxpy3B__any_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_int8
+GrB_Info GB_Adot4B__any_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_int8
+GrB_Info GB_Adot2B__land_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35440,10 +47054,10 @@ GrB_Info GB_Adot2B__lxor_lt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_int8
+GrB_Info GB_Adot3B__land_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35451,32 +47065,32 @@ GrB_Info GB_Adot3B__lxor_lt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_int8
+GrB_Info GB_Asaxpy3B__land_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_uint8
+GrB_Info GB_Adot4B__land_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_uint8
+GrB_Info GB_Adot2B__land_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35484,10 +47098,10 @@ GrB_Info GB_Adot2B__lxor_lt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_uint8
+GrB_Info GB_Adot3B__land_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35495,32 +47109,32 @@ GrB_Info GB_Adot3B__lxor_lt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_uint8
+GrB_Info GB_Asaxpy3B__land_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_int16
+GrB_Info GB_Adot4B__land_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_int16
+GrB_Info GB_Adot2B__land_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35528,10 +47142,10 @@ GrB_Info GB_Adot2B__lxor_lt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_int16
+GrB_Info GB_Adot3B__land_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35539,32 +47153,32 @@ GrB_Info GB_Adot3B__lxor_lt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_int16
+GrB_Info GB_Asaxpy3B__land_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_uint16
+GrB_Info GB_Adot4B__land_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_uint16
+GrB_Info GB_Adot2B__land_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35572,10 +47186,10 @@ GrB_Info GB_Adot2B__lxor_lt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_uint16
+GrB_Info GB_Adot3B__land_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35583,32 +47197,32 @@ GrB_Info GB_Adot3B__lxor_lt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_uint16
+GrB_Info GB_Asaxpy3B__land_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_int32
+GrB_Info GB_Adot4B__land_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_int32
+GrB_Info GB_Adot2B__land_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35616,10 +47230,10 @@ GrB_Info GB_Adot2B__lxor_lt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_int32
+GrB_Info GB_Adot3B__land_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35627,32 +47241,32 @@ GrB_Info GB_Adot3B__lxor_lt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_int32
+GrB_Info GB_Asaxpy3B__land_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_uint32
+GrB_Info GB_Adot4B__land_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_uint32
+GrB_Info GB_Adot2B__land_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35660,10 +47274,10 @@ GrB_Info GB_Adot2B__lxor_lt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_uint32
+GrB_Info GB_Adot3B__land_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35671,32 +47285,32 @@ GrB_Info GB_Adot3B__lxor_lt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_uint32
+GrB_Info GB_Asaxpy3B__land_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_int64
+GrB_Info GB_Adot4B__land_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_int64
+GrB_Info GB_Adot2B__land_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35704,10 +47318,10 @@ GrB_Info GB_Adot2B__lxor_lt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_int64
+GrB_Info GB_Adot3B__land_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35715,32 +47329,32 @@ GrB_Info GB_Adot3B__lxor_lt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_int64
+GrB_Info GB_Asaxpy3B__land_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_uint64
+GrB_Info GB_Adot4B__land_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_uint64
+GrB_Info GB_Adot2B__land_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35748,10 +47362,10 @@ GrB_Info GB_Adot2B__lxor_lt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_uint64
+GrB_Info GB_Adot3B__land_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35759,32 +47373,32 @@ GrB_Info GB_Adot3B__lxor_lt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_uint64
+GrB_Info GB_Asaxpy3B__land_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_fp32
+GrB_Info GB_Adot4B__land_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_fp32
+GrB_Info GB_Adot2B__land_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35792,10 +47406,10 @@ GrB_Info GB_Adot2B__lxor_lt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_fp32
+GrB_Info GB_Adot3B__land_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35803,32 +47417,32 @@ GrB_Info GB_Adot3B__lxor_lt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_fp32
+GrB_Info GB_Asaxpy3B__land_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lt_fp64
+GrB_Info GB_Adot4B__land_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lt_fp64
+GrB_Info GB_Adot2B__land_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35836,10 +47450,10 @@ GrB_Info GB_Adot2B__lxor_lt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lt_fp64
+GrB_Info GB_Adot3B__land_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35847,32 +47461,32 @@ GrB_Info GB_Adot3B__lxor_lt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lt_fp64
+GrB_Info GB_Asaxpy3B__land_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_bool
+GrB_Info GB_Adot4B__land_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_bool
+GrB_Info GB_Adot2B__land_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35880,10 +47494,10 @@ GrB_Info GB_Adot2B__eq_lt_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_bool
+GrB_Info GB_Adot3B__land_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35891,32 +47505,32 @@ GrB_Info GB_Adot3B__eq_lt_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_bool
+GrB_Info GB_Asaxpy3B__land_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_int8
+GrB_Info GB_Adot4B__land_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_int8
+GrB_Info GB_Adot2B__lxor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35924,10 +47538,10 @@ GrB_Info GB_Adot2B__eq_lt_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_int8
+GrB_Info GB_Adot3B__lxor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35935,32 +47549,32 @@ GrB_Info GB_Adot3B__eq_lt_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_int8
+GrB_Info GB_Asaxpy3B__lxor_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_uint8
+GrB_Info GB_Adot4B__lxor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_uint8
+GrB_Info GB_Adot2B__lxor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -35968,10 +47582,10 @@ GrB_Info GB_Adot2B__eq_lt_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_uint8
+GrB_Info GB_Adot3B__lxor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -35979,32 +47593,32 @@ GrB_Info GB_Adot3B__eq_lt_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_uint8
+GrB_Info GB_Asaxpy3B__lxor_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_int16
+GrB_Info GB_Adot4B__lxor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_int16
+GrB_Info GB_Adot2B__lxor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36012,10 +47626,10 @@ GrB_Info GB_Adot2B__eq_lt_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_int16
+GrB_Info GB_Adot3B__lxor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36023,32 +47637,32 @@ GrB_Info GB_Adot3B__eq_lt_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_int16
+GrB_Info GB_Asaxpy3B__lxor_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_uint16
+GrB_Info GB_Adot4B__lxor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_uint16
+GrB_Info GB_Adot2B__lxor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36056,10 +47670,10 @@ GrB_Info GB_Adot2B__eq_lt_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_uint16
+GrB_Info GB_Adot3B__lxor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36067,32 +47681,32 @@ GrB_Info GB_Adot3B__eq_lt_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_uint16
+GrB_Info GB_Asaxpy3B__lxor_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_int32
+GrB_Info GB_Adot4B__lxor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_int32
+GrB_Info GB_Adot2B__lxor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36100,10 +47714,10 @@ GrB_Info GB_Adot2B__eq_lt_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_int32
+GrB_Info GB_Adot3B__lxor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36111,32 +47725,32 @@ GrB_Info GB_Adot3B__eq_lt_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_int32
+GrB_Info GB_Asaxpy3B__lxor_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_uint32
+GrB_Info GB_Adot4B__lxor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_uint32
+GrB_Info GB_Adot2B__lxor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36144,10 +47758,10 @@ GrB_Info GB_Adot2B__eq_lt_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_uint32
+GrB_Info GB_Adot3B__lxor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36155,32 +47769,32 @@ GrB_Info GB_Adot3B__eq_lt_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_uint32
+GrB_Info GB_Asaxpy3B__lxor_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_int64
+GrB_Info GB_Adot4B__lxor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_int64
+GrB_Info GB_Adot2B__lxor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36188,10 +47802,10 @@ GrB_Info GB_Adot2B__eq_lt_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_int64
+GrB_Info GB_Adot3B__lxor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36199,32 +47813,32 @@ GrB_Info GB_Adot3B__eq_lt_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_int64
+GrB_Info GB_Asaxpy3B__lxor_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_uint64
+GrB_Info GB_Adot4B__lxor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_uint64
+GrB_Info GB_Adot2B__lxor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36232,10 +47846,10 @@ GrB_Info GB_Adot2B__eq_lt_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_uint64
+GrB_Info GB_Adot3B__lxor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36243,32 +47857,32 @@ GrB_Info GB_Adot3B__eq_lt_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_uint64
+GrB_Info GB_Asaxpy3B__lxor_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_fp32
+GrB_Info GB_Adot4B__lxor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_fp32
+GrB_Info GB_Adot2B__lxor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36276,10 +47890,10 @@ GrB_Info GB_Adot2B__eq_lt_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_fp32
+GrB_Info GB_Adot3B__lxor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36287,32 +47901,32 @@ GrB_Info GB_Adot3B__eq_lt_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_fp32
+GrB_Info GB_Asaxpy3B__lxor_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lt_fp64
+GrB_Info GB_Adot4B__lxor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lt_fp64
+GrB_Info GB_Adot2B__lxor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36320,10 +47934,10 @@ GrB_Info GB_Adot2B__eq_lt_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lt_fp64
+GrB_Info GB_Adot3B__lxor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36331,32 +47945,32 @@ GrB_Info GB_Adot3B__eq_lt_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lt_fp64
+GrB_Info GB_Asaxpy3B__lxor_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_bool
+GrB_Info GB_Adot4B__lxor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_bool
+GrB_Info GB_Adot2B__lxor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36364,10 +47978,10 @@ GrB_Info GB_Adot2B__lor_ge_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_bool
+GrB_Info GB_Adot3B__lxor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36375,32 +47989,32 @@ GrB_Info GB_Adot3B__lor_ge_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_bool
+GrB_Info GB_Asaxpy3B__lxor_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_int8
+GrB_Info GB_Adot4B__lxor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_int8
+GrB_Info GB_Adot2B__eq_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36408,10 +48022,10 @@ GrB_Info GB_Adot2B__lor_ge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_int8
+GrB_Info GB_Adot3B__eq_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36419,32 +48033,32 @@ GrB_Info GB_Adot3B__lor_ge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_int8
+GrB_Info GB_Asaxpy3B__eq_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_uint8
+GrB_Info GB_Adot4B__eq_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_uint8
+GrB_Info GB_Adot2B__eq_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36452,10 +48066,10 @@ GrB_Info GB_Adot2B__lor_ge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_uint8
+GrB_Info GB_Adot3B__eq_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36463,32 +48077,32 @@ GrB_Info GB_Adot3B__lor_ge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_uint8
+GrB_Info GB_Asaxpy3B__eq_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_int16
+GrB_Info GB_Adot4B__eq_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_int16
+GrB_Info GB_Adot2B__eq_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36496,10 +48110,10 @@ GrB_Info GB_Adot2B__lor_ge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_int16
+GrB_Info GB_Adot3B__eq_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36507,32 +48121,32 @@ GrB_Info GB_Adot3B__lor_ge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_int16
+GrB_Info GB_Asaxpy3B__eq_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_uint16
+GrB_Info GB_Adot4B__eq_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_uint16
+GrB_Info GB_Adot2B__eq_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36540,10 +48154,10 @@ GrB_Info GB_Adot2B__lor_ge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_uint16
+GrB_Info GB_Adot3B__eq_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36551,32 +48165,32 @@ GrB_Info GB_Adot3B__lor_ge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_uint16
+GrB_Info GB_Asaxpy3B__eq_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_int32
+GrB_Info GB_Adot4B__eq_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_int32
+GrB_Info GB_Adot2B__eq_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36584,10 +48198,10 @@ GrB_Info GB_Adot2B__lor_ge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_int32
+GrB_Info GB_Adot3B__eq_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36595,32 +48209,32 @@ GrB_Info GB_Adot3B__lor_ge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_int32
+GrB_Info GB_Asaxpy3B__eq_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_uint32
+GrB_Info GB_Adot4B__eq_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_uint32
+GrB_Info GB_Adot2B__eq_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36628,10 +48242,10 @@ GrB_Info GB_Adot2B__lor_ge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_uint32
+GrB_Info GB_Adot3B__eq_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36639,32 +48253,32 @@ GrB_Info GB_Adot3B__lor_ge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_uint32
+GrB_Info GB_Asaxpy3B__eq_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_int64
+GrB_Info GB_Adot4B__eq_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_int64
+GrB_Info GB_Adot2B__eq_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36672,10 +48286,10 @@ GrB_Info GB_Adot2B__lor_ge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_int64
+GrB_Info GB_Adot3B__eq_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36683,32 +48297,32 @@ GrB_Info GB_Adot3B__lor_ge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_int64
+GrB_Info GB_Asaxpy3B__eq_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_uint64
+GrB_Info GB_Adot4B__eq_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_uint64
+GrB_Info GB_Adot2B__eq_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36716,10 +48330,10 @@ GrB_Info GB_Adot2B__lor_ge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_uint64
+GrB_Info GB_Adot3B__eq_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36727,32 +48341,32 @@ GrB_Info GB_Adot3B__lor_ge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_uint64
+GrB_Info GB_Asaxpy3B__eq_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_fp32
+GrB_Info GB_Adot4B__eq_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_fp32
+GrB_Info GB_Adot2B__eq_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36760,10 +48374,10 @@ GrB_Info GB_Adot2B__lor_ge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_fp32
+GrB_Info GB_Adot3B__eq_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36771,32 +48385,32 @@ GrB_Info GB_Adot3B__lor_ge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_fp32
+GrB_Info GB_Asaxpy3B__eq_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_ge_fp64
+GrB_Info GB_Adot4B__eq_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_ge_fp64
+GrB_Info GB_Adot2B__eq_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36804,10 +48418,10 @@ GrB_Info GB_Adot2B__lor_ge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_ge_fp64
+GrB_Info GB_Adot3B__eq_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36815,32 +48429,32 @@ GrB_Info GB_Adot3B__lor_ge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_ge_fp64
+GrB_Info GB_Asaxpy3B__eq_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_bool
+GrB_Info GB_Adot4B__eq_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_bool
+GrB_Info GB_Adot2B__eq_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36848,10 +48462,10 @@ GrB_Info GB_Adot2B__land_ge_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_bool
+GrB_Info GB_Adot3B__eq_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36859,32 +48473,32 @@ GrB_Info GB_Adot3B__land_ge_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_bool
+GrB_Info GB_Asaxpy3B__eq_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_int8
+GrB_Info GB_Adot4B__eq_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_int8
+GrB_Info GB_Adot2B__lor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36892,10 +48506,10 @@ GrB_Info GB_Adot2B__land_ge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_int8
+GrB_Info GB_Adot3B__lor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36903,32 +48517,32 @@ GrB_Info GB_Adot3B__land_ge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_int8
+GrB_Info GB_Asaxpy3B__lor_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_uint8
+GrB_Info GB_Adot4B__lor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_uint8
+GrB_Info GB_Adot2B__lor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36936,10 +48550,10 @@ GrB_Info GB_Adot2B__land_ge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_uint8
+GrB_Info GB_Adot3B__lor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36947,32 +48561,32 @@ GrB_Info GB_Adot3B__land_ge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_uint8
+GrB_Info GB_Asaxpy3B__lor_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_int16
+GrB_Info GB_Adot4B__lor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_int16
+GrB_Info GB_Adot2B__lor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -36980,10 +48594,10 @@ GrB_Info GB_Adot2B__land_ge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_int16
+GrB_Info GB_Adot3B__lor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -36991,32 +48605,32 @@ GrB_Info GB_Adot3B__land_ge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_int16
+GrB_Info GB_Asaxpy3B__lor_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_uint16
+GrB_Info GB_Adot4B__lor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_uint16
+GrB_Info GB_Adot2B__lor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37024,10 +48638,10 @@ GrB_Info GB_Adot2B__land_ge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_uint16
+GrB_Info GB_Adot3B__lor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37035,32 +48649,32 @@ GrB_Info GB_Adot3B__land_ge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_uint16
+GrB_Info GB_Asaxpy3B__lor_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_int32
+GrB_Info GB_Adot4B__lor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_int32
+GrB_Info GB_Adot2B__lor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37068,10 +48682,10 @@ GrB_Info GB_Adot2B__land_ge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_int32
+GrB_Info GB_Adot3B__lor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37079,32 +48693,32 @@ GrB_Info GB_Adot3B__land_ge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_int32
+GrB_Info GB_Asaxpy3B__lor_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_uint32
+GrB_Info GB_Adot4B__lor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_uint32
+GrB_Info GB_Adot2B__lor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37112,10 +48726,10 @@ GrB_Info GB_Adot2B__land_ge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_uint32
+GrB_Info GB_Adot3B__lor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37123,32 +48737,32 @@ GrB_Info GB_Adot3B__land_ge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_uint32
+GrB_Info GB_Asaxpy3B__lor_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_int64
+GrB_Info GB_Adot4B__lor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_int64
+GrB_Info GB_Adot2B__lor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37156,10 +48770,10 @@ GrB_Info GB_Adot2B__land_ge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_int64
+GrB_Info GB_Adot3B__lor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37167,32 +48781,32 @@ GrB_Info GB_Adot3B__land_ge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_int64
+GrB_Info GB_Asaxpy3B__lor_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_uint64
+GrB_Info GB_Adot4B__lor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_uint64
+GrB_Info GB_Adot2B__lor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37200,10 +48814,10 @@ GrB_Info GB_Adot2B__land_ge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_uint64
+GrB_Info GB_Adot3B__lor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37211,32 +48825,32 @@ GrB_Info GB_Adot3B__land_ge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_uint64
+GrB_Info GB_Asaxpy3B__lor_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_fp32
+GrB_Info GB_Adot4B__lor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_fp32
+GrB_Info GB_Adot2B__lor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37244,10 +48858,10 @@ GrB_Info GB_Adot2B__land_ge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_fp32
+GrB_Info GB_Adot3B__lor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37255,32 +48869,32 @@ GrB_Info GB_Adot3B__land_ge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_fp32
+GrB_Info GB_Asaxpy3B__lor_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_ge_fp64
+GrB_Info GB_Adot4B__lor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_ge_fp64
+GrB_Info GB_Adot2B__lor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37288,10 +48902,10 @@ GrB_Info GB_Adot2B__land_ge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_ge_fp64
+GrB_Info GB_Adot3B__lor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37299,32 +48913,32 @@ GrB_Info GB_Adot3B__land_ge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_ge_fp64
+GrB_Info GB_Asaxpy3B__lor_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_bool
+GrB_Info GB_Adot4B__lor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_bool
+GrB_Info GB_Adot2B__lor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37332,10 +48946,10 @@ GrB_Info GB_Adot2B__lxor_ge_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_bool
+GrB_Info GB_Adot3B__lor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37343,32 +48957,32 @@ GrB_Info GB_Adot3B__lxor_ge_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_bool
+GrB_Info GB_Asaxpy3B__lor_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_int8
+GrB_Info GB_Adot4B__lor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_int8
+GrB_Info GB_Adot2B__any_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37376,10 +48990,10 @@ GrB_Info GB_Adot2B__lxor_ge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_int8
+GrB_Info GB_Adot3B__any_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37387,32 +49001,32 @@ GrB_Info GB_Adot3B__lxor_ge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_int8
+GrB_Info GB_Asaxpy3B__any_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_uint8
+GrB_Info GB_Adot4B__any_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_uint8
+GrB_Info GB_Adot2B__any_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37420,10 +49034,10 @@ GrB_Info GB_Adot2B__lxor_ge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_uint8
+GrB_Info GB_Adot3B__any_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37431,32 +49045,32 @@ GrB_Info GB_Adot3B__lxor_ge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_uint8
+GrB_Info GB_Asaxpy3B__any_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_int16
+GrB_Info GB_Adot4B__any_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_int16
+GrB_Info GB_Adot2B__any_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37464,10 +49078,10 @@ GrB_Info GB_Adot2B__lxor_ge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_int16
+GrB_Info GB_Adot3B__any_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37475,32 +49089,32 @@ GrB_Info GB_Adot3B__lxor_ge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_int16
+GrB_Info GB_Asaxpy3B__any_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_uint16
+GrB_Info GB_Adot4B__any_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_uint16
+GrB_Info GB_Adot2B__any_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37508,10 +49122,10 @@ GrB_Info GB_Adot2B__lxor_ge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_uint16
+GrB_Info GB_Adot3B__any_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37519,32 +49133,32 @@ GrB_Info GB_Adot3B__lxor_ge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_uint16
+GrB_Info GB_Asaxpy3B__any_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_int32
+GrB_Info GB_Adot4B__any_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_int32
+GrB_Info GB_Adot2B__any_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37552,10 +49166,10 @@ GrB_Info GB_Adot2B__lxor_ge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_int32
+GrB_Info GB_Adot3B__any_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37563,32 +49177,32 @@ GrB_Info GB_Adot3B__lxor_ge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_int32
+GrB_Info GB_Asaxpy3B__any_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_uint32
+GrB_Info GB_Adot4B__any_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_uint32
+GrB_Info GB_Adot2B__any_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37596,10 +49210,10 @@ GrB_Info GB_Adot2B__lxor_ge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_uint32
+GrB_Info GB_Adot3B__any_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37607,32 +49221,32 @@ GrB_Info GB_Adot3B__lxor_ge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_uint32
+GrB_Info GB_Asaxpy3B__any_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_int64
+GrB_Info GB_Adot4B__any_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_int64
+GrB_Info GB_Adot2B__any_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37640,10 +49254,10 @@ GrB_Info GB_Adot2B__lxor_ge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_int64
+GrB_Info GB_Adot3B__any_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37651,32 +49265,32 @@ GrB_Info GB_Adot3B__lxor_ge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_int64
+GrB_Info GB_Asaxpy3B__any_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_uint64
+GrB_Info GB_Adot4B__any_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_uint64
+GrB_Info GB_Adot2B__any_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37684,10 +49298,10 @@ GrB_Info GB_Adot2B__lxor_ge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_uint64
+GrB_Info GB_Adot3B__any_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37695,32 +49309,32 @@ GrB_Info GB_Adot3B__lxor_ge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_uint64
+GrB_Info GB_Asaxpy3B__any_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_fp32
+GrB_Info GB_Adot4B__any_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_fp32
+GrB_Info GB_Adot2B__any_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37728,10 +49342,10 @@ GrB_Info GB_Adot2B__lxor_ge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_fp32
+GrB_Info GB_Adot3B__any_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37739,32 +49353,32 @@ GrB_Info GB_Adot3B__lxor_ge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_fp32
+GrB_Info GB_Asaxpy3B__any_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_ge_fp64
+GrB_Info GB_Adot4B__any_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_ge_fp64
+GrB_Info GB_Adot2B__any_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37772,10 +49386,10 @@ GrB_Info GB_Adot2B__lxor_ge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_ge_fp64
+GrB_Info GB_Adot3B__any_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37783,32 +49397,32 @@ GrB_Info GB_Adot3B__lxor_ge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_ge_fp64
+GrB_Info GB_Asaxpy3B__any_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_bool
+GrB_Info GB_Adot4B__any_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_bool
+GrB_Info GB_Adot2B__any_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37816,10 +49430,10 @@ GrB_Info GB_Adot2B__eq_ge_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_bool
+GrB_Info GB_Adot3B__any_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37827,32 +49441,32 @@ GrB_Info GB_Adot3B__eq_ge_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_bool
+GrB_Info GB_Asaxpy3B__any_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_int8
+GrB_Info GB_Adot4B__any_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_int8
+GrB_Info GB_Adot2B__land_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37860,10 +49474,10 @@ GrB_Info GB_Adot2B__eq_ge_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_int8
+GrB_Info GB_Adot3B__land_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37871,32 +49485,32 @@ GrB_Info GB_Adot3B__eq_ge_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_int8
+GrB_Info GB_Asaxpy3B__land_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_uint8
+GrB_Info GB_Adot4B__land_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_uint8
+GrB_Info GB_Adot2B__land_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37904,10 +49518,10 @@ GrB_Info GB_Adot2B__eq_ge_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_uint8
+GrB_Info GB_Adot3B__land_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37915,32 +49529,32 @@ GrB_Info GB_Adot3B__eq_ge_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_uint8
+GrB_Info GB_Asaxpy3B__land_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_int16
+GrB_Info GB_Adot4B__land_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_int16
+GrB_Info GB_Adot2B__land_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37948,10 +49562,10 @@ GrB_Info GB_Adot2B__eq_ge_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_int16
+GrB_Info GB_Adot3B__land_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -37959,32 +49573,32 @@ GrB_Info GB_Adot3B__eq_ge_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_int16
+GrB_Info GB_Asaxpy3B__land_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_uint16
+GrB_Info GB_Adot4B__land_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_uint16
+GrB_Info GB_Adot2B__land_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -37992,10 +49606,10 @@ GrB_Info GB_Adot2B__eq_ge_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_uint16
+GrB_Info GB_Adot3B__land_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38003,32 +49617,32 @@ GrB_Info GB_Adot3B__eq_ge_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_uint16
+GrB_Info GB_Asaxpy3B__land_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_int32
+GrB_Info GB_Adot4B__land_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_int32
+GrB_Info GB_Adot2B__land_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38036,10 +49650,10 @@ GrB_Info GB_Adot2B__eq_ge_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_int32
+GrB_Info GB_Adot3B__land_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38047,32 +49661,32 @@ GrB_Info GB_Adot3B__eq_ge_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_int32
+GrB_Info GB_Asaxpy3B__land_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_uint32
+GrB_Info GB_Adot4B__land_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_uint32
+GrB_Info GB_Adot2B__land_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38080,10 +49694,10 @@ GrB_Info GB_Adot2B__eq_ge_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_uint32
+GrB_Info GB_Adot3B__land_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38091,32 +49705,32 @@ GrB_Info GB_Adot3B__eq_ge_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_uint32
+GrB_Info GB_Asaxpy3B__land_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_int64
+GrB_Info GB_Adot4B__land_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_int64
+GrB_Info GB_Adot2B__land_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38124,10 +49738,10 @@ GrB_Info GB_Adot2B__eq_ge_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_int64
+GrB_Info GB_Adot3B__land_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38135,32 +49749,32 @@ GrB_Info GB_Adot3B__eq_ge_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_int64
+GrB_Info GB_Asaxpy3B__land_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_uint64
+GrB_Info GB_Adot4B__land_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_uint64
+GrB_Info GB_Adot2B__land_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38168,10 +49782,10 @@ GrB_Info GB_Adot2B__eq_ge_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_uint64
+GrB_Info GB_Adot3B__land_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38179,32 +49793,32 @@ GrB_Info GB_Adot3B__eq_ge_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_uint64
+GrB_Info GB_Asaxpy3B__land_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_fp32
+GrB_Info GB_Adot4B__land_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_fp32
+GrB_Info GB_Adot2B__land_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38212,10 +49826,10 @@ GrB_Info GB_Adot2B__eq_ge_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_fp32
+GrB_Info GB_Adot3B__land_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38223,32 +49837,32 @@ GrB_Info GB_Adot3B__eq_ge_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_fp32
+GrB_Info GB_Asaxpy3B__land_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_ge_fp64
+GrB_Info GB_Adot4B__land_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_ge_fp64
+GrB_Info GB_Adot2B__land_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38256,10 +49870,10 @@ GrB_Info GB_Adot2B__eq_ge_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_ge_fp64
+GrB_Info GB_Adot3B__land_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38267,32 +49881,32 @@ GrB_Info GB_Adot3B__eq_ge_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_ge_fp64
+GrB_Info GB_Asaxpy3B__land_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_bool
+GrB_Info GB_Adot4B__land_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_bool
+GrB_Info GB_Adot2B__land_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38300,10 +49914,10 @@ GrB_Info GB_Adot2B__lor_le_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_bool
+GrB_Info GB_Adot3B__land_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38311,32 +49925,32 @@ GrB_Info GB_Adot3B__lor_le_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_bool
+GrB_Info GB_Asaxpy3B__land_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_int8
+GrB_Info GB_Adot4B__land_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_int8
+GrB_Info GB_Adot2B__lxor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38344,10 +49958,10 @@ GrB_Info GB_Adot2B__lor_le_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_int8
+GrB_Info GB_Adot3B__lxor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38355,32 +49969,32 @@ GrB_Info GB_Adot3B__lor_le_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_int8
+GrB_Info GB_Asaxpy3B__lxor_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_uint8
+GrB_Info GB_Adot4B__lxor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_uint8
+GrB_Info GB_Adot2B__lxor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38388,10 +50002,10 @@ GrB_Info GB_Adot2B__lor_le_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_uint8
+GrB_Info GB_Adot3B__lxor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38399,32 +50013,32 @@ GrB_Info GB_Adot3B__lor_le_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_uint8
+GrB_Info GB_Asaxpy3B__lxor_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_int16
+GrB_Info GB_Adot4B__lxor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_int16
+GrB_Info GB_Adot2B__lxor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38432,10 +50046,10 @@ GrB_Info GB_Adot2B__lor_le_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_int16
+GrB_Info GB_Adot3B__lxor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38443,32 +50057,32 @@ GrB_Info GB_Adot3B__lor_le_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_int16
+GrB_Info GB_Asaxpy3B__lxor_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_uint16
+GrB_Info GB_Adot4B__lxor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_uint16
+GrB_Info GB_Adot2B__lxor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38476,10 +50090,10 @@ GrB_Info GB_Adot2B__lor_le_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_uint16
+GrB_Info GB_Adot3B__lxor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38487,32 +50101,32 @@ GrB_Info GB_Adot3B__lor_le_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_uint16
+GrB_Info GB_Asaxpy3B__lxor_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_int32
+GrB_Info GB_Adot4B__lxor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_int32
+GrB_Info GB_Adot2B__lxor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38520,10 +50134,10 @@ GrB_Info GB_Adot2B__lor_le_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_int32
+GrB_Info GB_Adot3B__lxor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38531,32 +50145,32 @@ GrB_Info GB_Adot3B__lor_le_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_int32
+GrB_Info GB_Asaxpy3B__lxor_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_uint32
+GrB_Info GB_Adot4B__lxor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_uint32
+GrB_Info GB_Adot2B__lxor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38564,10 +50178,10 @@ GrB_Info GB_Adot2B__lor_le_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_uint32
+GrB_Info GB_Adot3B__lxor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38575,32 +50189,32 @@ GrB_Info GB_Adot3B__lor_le_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_uint32
+GrB_Info GB_Asaxpy3B__lxor_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_int64
+GrB_Info GB_Adot4B__lxor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_int64
+GrB_Info GB_Adot2B__lxor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38608,10 +50222,10 @@ GrB_Info GB_Adot2B__lor_le_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_int64
+GrB_Info GB_Adot3B__lxor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38619,32 +50233,32 @@ GrB_Info GB_Adot3B__lor_le_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_int64
+GrB_Info GB_Asaxpy3B__lxor_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_uint64
+GrB_Info GB_Adot4B__lxor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_uint64
+GrB_Info GB_Adot2B__lxor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38652,10 +50266,10 @@ GrB_Info GB_Adot2B__lor_le_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_uint64
+GrB_Info GB_Adot3B__lxor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38663,32 +50277,32 @@ GrB_Info GB_Adot3B__lor_le_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_uint64
+GrB_Info GB_Asaxpy3B__lxor_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_fp32
+GrB_Info GB_Adot4B__lxor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_fp32
+GrB_Info GB_Adot2B__lxor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38696,10 +50310,10 @@ GrB_Info GB_Adot2B__lor_le_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_fp32
+GrB_Info GB_Adot3B__lxor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38707,32 +50321,32 @@ GrB_Info GB_Adot3B__lor_le_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_fp32
+GrB_Info GB_Asaxpy3B__lxor_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_le_fp64
+GrB_Info GB_Adot4B__lxor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_le_fp64
+GrB_Info GB_Adot2B__lxor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38740,10 +50354,10 @@ GrB_Info GB_Adot2B__lor_le_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_le_fp64
+GrB_Info GB_Adot3B__lxor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38751,32 +50365,32 @@ GrB_Info GB_Adot3B__lor_le_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_le_fp64
+GrB_Info GB_Asaxpy3B__lxor_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_bool
+GrB_Info GB_Adot4B__lxor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_bool
+GrB_Info GB_Adot2B__lxor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38784,10 +50398,10 @@ GrB_Info GB_Adot2B__land_le_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_bool
+GrB_Info GB_Adot3B__lxor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38795,32 +50409,32 @@ GrB_Info GB_Adot3B__land_le_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_bool
+GrB_Info GB_Asaxpy3B__lxor_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_int8
+GrB_Info GB_Adot4B__lxor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_int8
+GrB_Info GB_Adot2B__eq_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38828,10 +50442,10 @@ GrB_Info GB_Adot2B__land_le_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_int8
+GrB_Info GB_Adot3B__eq_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38839,32 +50453,32 @@ GrB_Info GB_Adot3B__land_le_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_int8
+GrB_Info GB_Asaxpy3B__eq_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_uint8
+GrB_Info GB_Adot4B__eq_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_uint8
+GrB_Info GB_Adot2B__eq_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38872,10 +50486,10 @@ GrB_Info GB_Adot2B__land_le_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_uint8
+GrB_Info GB_Adot3B__eq_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38883,32 +50497,32 @@ GrB_Info GB_Adot3B__land_le_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_uint8
+GrB_Info GB_Asaxpy3B__eq_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_int16
+GrB_Info GB_Adot4B__eq_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_int16
+GrB_Info GB_Adot2B__eq_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38916,10 +50530,10 @@ GrB_Info GB_Adot2B__land_le_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_int16
+GrB_Info GB_Adot3B__eq_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38927,32 +50541,32 @@ GrB_Info GB_Adot3B__land_le_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_int16
+GrB_Info GB_Asaxpy3B__eq_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_uint16
+GrB_Info GB_Adot4B__eq_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_uint16
+GrB_Info GB_Adot2B__eq_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -38960,10 +50574,10 @@ GrB_Info GB_Adot2B__land_le_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_uint16
+GrB_Info GB_Adot3B__eq_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -38971,32 +50585,32 @@ GrB_Info GB_Adot3B__land_le_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_uint16
+GrB_Info GB_Asaxpy3B__eq_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_int32
+GrB_Info GB_Adot4B__eq_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_int32
+GrB_Info GB_Adot2B__eq_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39004,10 +50618,10 @@ GrB_Info GB_Adot2B__land_le_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_int32
+GrB_Info GB_Adot3B__eq_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39015,32 +50629,32 @@ GrB_Info GB_Adot3B__land_le_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_int32
+GrB_Info GB_Asaxpy3B__eq_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_uint32
+GrB_Info GB_Adot4B__eq_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_uint32
+GrB_Info GB_Adot2B__eq_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39048,10 +50662,10 @@ GrB_Info GB_Adot2B__land_le_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_uint32
+GrB_Info GB_Adot3B__eq_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39059,32 +50673,32 @@ GrB_Info GB_Adot3B__land_le_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_uint32
+GrB_Info GB_Asaxpy3B__eq_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_int64
+GrB_Info GB_Adot4B__eq_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_int64
+GrB_Info GB_Adot2B__eq_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39092,10 +50706,10 @@ GrB_Info GB_Adot2B__land_le_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_int64
+GrB_Info GB_Adot3B__eq_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39103,32 +50717,32 @@ GrB_Info GB_Adot3B__land_le_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_int64
+GrB_Info GB_Asaxpy3B__eq_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_uint64
+GrB_Info GB_Adot4B__eq_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_uint64
+GrB_Info GB_Adot2B__eq_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39136,10 +50750,10 @@ GrB_Info GB_Adot2B__land_le_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_uint64
+GrB_Info GB_Adot3B__eq_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39147,32 +50761,32 @@ GrB_Info GB_Adot3B__land_le_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_uint64
+GrB_Info GB_Asaxpy3B__eq_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_fp32
+GrB_Info GB_Adot4B__eq_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_fp32
+GrB_Info GB_Adot2B__eq_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39180,10 +50794,10 @@ GrB_Info GB_Adot2B__land_le_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_fp32
+GrB_Info GB_Adot3B__eq_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39191,32 +50805,32 @@ GrB_Info GB_Adot3B__land_le_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_fp32
+GrB_Info GB_Asaxpy3B__eq_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_le_fp64
+GrB_Info GB_Adot4B__eq_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_le_fp64
+GrB_Info GB_Adot2B__eq_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39224,10 +50838,10 @@ GrB_Info GB_Adot2B__land_le_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_le_fp64
+GrB_Info GB_Adot3B__eq_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39235,32 +50849,32 @@ GrB_Info GB_Adot3B__land_le_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_le_fp64
+GrB_Info GB_Asaxpy3B__eq_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_bool
+GrB_Info GB_Adot4B__eq_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_bool
+GrB_Info GB_Adot2B__eq_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39268,10 +50882,10 @@ GrB_Info GB_Adot2B__lxor_le_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_bool
+GrB_Info GB_Adot3B__eq_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39279,32 +50893,32 @@ GrB_Info GB_Adot3B__lxor_le_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_bool
+GrB_Info GB_Asaxpy3B__eq_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_int8
+GrB_Info GB_Adot4B__eq_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_int8
+GrB_Info GB_Adot2B__min_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39312,10 +50926,10 @@ GrB_Info GB_Adot2B__lxor_le_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_int8
+GrB_Info GB_Adot3B__min_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39323,32 +50937,32 @@ GrB_Info GB_Adot3B__lxor_le_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_int8
+GrB_Info GB_Asaxpy3B__min_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_uint8
+GrB_Info GB_Adot4B__min_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_uint8
+GrB_Info GB_Adot2B__min_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39356,10 +50970,10 @@ GrB_Info GB_Adot2B__lxor_le_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_uint8
+GrB_Info GB_Adot3B__min_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39367,32 +50981,32 @@ GrB_Info GB_Adot3B__lxor_le_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_uint8
+GrB_Info GB_Asaxpy3B__min_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_int16
+GrB_Info GB_Adot4B__min_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_int16
+GrB_Info GB_Adot2B__min_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39400,10 +51014,10 @@ GrB_Info GB_Adot2B__lxor_le_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_int16
+GrB_Info GB_Adot3B__min_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39411,32 +51025,32 @@ GrB_Info GB_Adot3B__lxor_le_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_int16
+GrB_Info GB_Asaxpy3B__min_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_uint16
+GrB_Info GB_Adot4B__min_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_uint16
+GrB_Info GB_Adot2B__min_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39444,10 +51058,10 @@ GrB_Info GB_Adot2B__lxor_le_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_uint16
+GrB_Info GB_Adot3B__min_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39455,32 +51069,32 @@ GrB_Info GB_Adot3B__lxor_le_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_uint16
+GrB_Info GB_Asaxpy3B__min_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_int32
+GrB_Info GB_Adot4B__min_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_int32
+GrB_Info GB_Adot2B__min_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39488,10 +51102,10 @@ GrB_Info GB_Adot2B__lxor_le_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_int32
+GrB_Info GB_Adot3B__min_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39499,32 +51113,32 @@ GrB_Info GB_Adot3B__lxor_le_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_int32
+GrB_Info GB_Asaxpy3B__min_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_uint32
+GrB_Info GB_Adot4B__min_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_uint32
+GrB_Info GB_Adot2B__min_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39532,10 +51146,10 @@ GrB_Info GB_Adot2B__lxor_le_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_uint32
+GrB_Info GB_Adot3B__min_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39543,32 +51157,32 @@ GrB_Info GB_Adot3B__lxor_le_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_uint32
+GrB_Info GB_Asaxpy3B__min_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_int64
+GrB_Info GB_Adot4B__min_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_int64
+GrB_Info GB_Adot2B__min_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39576,10 +51190,10 @@ GrB_Info GB_Adot2B__lxor_le_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_int64
+GrB_Info GB_Adot3B__min_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39587,32 +51201,32 @@ GrB_Info GB_Adot3B__lxor_le_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_int64
+GrB_Info GB_Asaxpy3B__min_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_uint64
+GrB_Info GB_Adot4B__min_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_uint64
+GrB_Info GB_Adot2B__min_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39620,10 +51234,10 @@ GrB_Info GB_Adot2B__lxor_le_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_uint64
+GrB_Info GB_Adot3B__min_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39631,32 +51245,32 @@ GrB_Info GB_Adot3B__lxor_le_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_uint64
+GrB_Info GB_Asaxpy3B__min_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_fp32
+GrB_Info GB_Adot4B__min_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_fp32
+GrB_Info GB_Adot2B__min_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39664,10 +51278,10 @@ GrB_Info GB_Adot2B__lxor_le_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_fp32
+GrB_Info GB_Adot3B__min_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39675,32 +51289,32 @@ GrB_Info GB_Adot3B__lxor_le_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_fp32
+GrB_Info GB_Asaxpy3B__min_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_le_fp64
+GrB_Info GB_Adot4B__min_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_le_fp64
+GrB_Info GB_Adot2B__min_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39708,10 +51322,10 @@ GrB_Info GB_Adot2B__lxor_le_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_le_fp64
+GrB_Info GB_Adot3B__min_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39719,32 +51333,32 @@ GrB_Info GB_Adot3B__lxor_le_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_le_fp64
+GrB_Info GB_Asaxpy3B__min_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_bool
+GrB_Info GB_Adot4B__min_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_bool
+GrB_Info GB_Adot2B__max_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39752,10 +51366,10 @@ GrB_Info GB_Adot2B__eq_le_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_bool
+GrB_Info GB_Adot3B__max_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39763,32 +51377,32 @@ GrB_Info GB_Adot3B__eq_le_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_bool
+GrB_Info GB_Asaxpy3B__max_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_int8
+GrB_Info GB_Adot4B__max_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_int8
+GrB_Info GB_Adot2B__max_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39796,10 +51410,10 @@ GrB_Info GB_Adot2B__eq_le_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_int8
+GrB_Info GB_Adot3B__max_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39807,32 +51421,32 @@ GrB_Info GB_Adot3B__eq_le_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_int8
+GrB_Info GB_Asaxpy3B__max_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_uint8
+GrB_Info GB_Adot4B__max_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_uint8
+GrB_Info GB_Adot2B__max_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39840,10 +51454,10 @@ GrB_Info GB_Adot2B__eq_le_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_uint8
+GrB_Info GB_Adot3B__max_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39851,32 +51465,32 @@ GrB_Info GB_Adot3B__eq_le_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_uint8
+GrB_Info GB_Asaxpy3B__max_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_int16
+GrB_Info GB_Adot4B__max_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_int16
+GrB_Info GB_Adot2B__max_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39884,10 +51498,10 @@ GrB_Info GB_Adot2B__eq_le_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_int16
+GrB_Info GB_Adot3B__max_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39895,32 +51509,32 @@ GrB_Info GB_Adot3B__eq_le_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_int16
+GrB_Info GB_Asaxpy3B__max_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_uint16
+GrB_Info GB_Adot4B__max_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_uint16
+GrB_Info GB_Adot2B__max_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39928,10 +51542,10 @@ GrB_Info GB_Adot2B__eq_le_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_uint16
+GrB_Info GB_Adot3B__max_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39939,32 +51553,32 @@ GrB_Info GB_Adot3B__eq_le_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_uint16
+GrB_Info GB_Asaxpy3B__max_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_int32
+GrB_Info GB_Adot4B__max_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_int32
+GrB_Info GB_Adot2B__max_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -39972,10 +51586,10 @@ GrB_Info GB_Adot2B__eq_le_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_int32
+GrB_Info GB_Adot3B__max_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -39983,32 +51597,32 @@ GrB_Info GB_Adot3B__eq_le_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_int32
+GrB_Info GB_Asaxpy3B__max_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_uint32
+GrB_Info GB_Adot4B__max_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_uint32
+GrB_Info GB_Adot2B__max_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40016,10 +51630,10 @@ GrB_Info GB_Adot2B__eq_le_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_uint32
+GrB_Info GB_Adot3B__max_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40027,32 +51641,32 @@ GrB_Info GB_Adot3B__eq_le_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_uint32
+GrB_Info GB_Asaxpy3B__max_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_int64
+GrB_Info GB_Adot4B__max_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_int64
+GrB_Info GB_Adot2B__max_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40060,10 +51674,10 @@ GrB_Info GB_Adot2B__eq_le_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_int64
+GrB_Info GB_Adot3B__max_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40071,32 +51685,32 @@ GrB_Info GB_Adot3B__eq_le_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_int64
+GrB_Info GB_Asaxpy3B__max_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_uint64
+GrB_Info GB_Adot4B__max_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_uint64
+GrB_Info GB_Adot2B__max_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40104,10 +51718,10 @@ GrB_Info GB_Adot2B__eq_le_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_uint64
+GrB_Info GB_Adot3B__max_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40115,32 +51729,32 @@ GrB_Info GB_Adot3B__eq_le_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_uint64
+GrB_Info GB_Asaxpy3B__max_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_fp32
+GrB_Info GB_Adot4B__max_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_fp32
+GrB_Info GB_Adot2B__max_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40148,10 +51762,10 @@ GrB_Info GB_Adot2B__eq_le_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_fp32
+GrB_Info GB_Adot3B__max_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40159,32 +51773,32 @@ GrB_Info GB_Adot3B__eq_le_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_fp32
+GrB_Info GB_Asaxpy3B__max_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_le_fp64
+GrB_Info GB_Adot4B__max_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_le_fp64
+GrB_Info GB_Adot2B__any_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40192,10 +51806,10 @@ GrB_Info GB_Adot2B__eq_le_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_le_fp64
+GrB_Info GB_Adot3B__any_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40203,33 +51817,32 @@ GrB_Info GB_Adot3B__eq_le_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_le_fp64
+GrB_Info GB_Asaxpy3B__any_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_int8
+GrB_Info GB_Adot4B__any_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_int8
+GrB_Info GB_Adot2B__any_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40237,10 +51850,10 @@ GrB_Info GB_Adot2B__min_lor_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_int8
+GrB_Info GB_Adot3B__any_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40248,33 +51861,32 @@ GrB_Info GB_Adot3B__min_lor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_int8
+GrB_Info GB_Asaxpy3B__any_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_int16
+GrB_Info GB_Adot4B__any_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_int16
+GrB_Info GB_Adot2B__any_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40282,10 +51894,10 @@ GrB_Info GB_Adot2B__min_lor_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_int16
+GrB_Info GB_Adot3B__any_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40293,33 +51905,32 @@ GrB_Info GB_Adot3B__min_lor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_int16
+GrB_Info GB_Asaxpy3B__any_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_int32
+GrB_Info GB_Adot4B__any_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_int32
+GrB_Info GB_Adot2B__any_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40327,10 +51938,10 @@ GrB_Info GB_Adot2B__min_lor_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_int32
+GrB_Info GB_Adot3B__any_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40338,33 +51949,32 @@ GrB_Info GB_Adot3B__min_lor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_int32
+GrB_Info GB_Asaxpy3B__any_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_int64
+GrB_Info GB_Adot4B__any_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_int64
+GrB_Info GB_Adot2B__any_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40372,10 +51982,10 @@ GrB_Info GB_Adot2B__min_lor_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_int64
+GrB_Info GB_Adot3B__any_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40383,33 +51993,32 @@ GrB_Info GB_Adot3B__min_lor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_int64
+GrB_Info GB_Asaxpy3B__any_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_uint8
+GrB_Info GB_Adot4B__any_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_uint8
+GrB_Info GB_Adot2B__any_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40417,10 +52026,10 @@ GrB_Info GB_Adot2B__min_lor_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_uint8
+GrB_Info GB_Adot3B__any_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40428,33 +52037,32 @@ GrB_Info GB_Adot3B__min_lor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_uint8
+GrB_Info GB_Asaxpy3B__any_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_uint16
+GrB_Info GB_Adot4B__any_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_uint16
+GrB_Info GB_Adot2B__any_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40462,10 +52070,10 @@ GrB_Info GB_Adot2B__min_lor_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_uint16
+GrB_Info GB_Adot3B__any_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40473,33 +52081,32 @@ GrB_Info GB_Adot3B__min_lor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_uint16
+GrB_Info GB_Asaxpy3B__any_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_uint32
+GrB_Info GB_Adot4B__any_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_uint32
+GrB_Info GB_Adot2B__any_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40507,10 +52114,10 @@ GrB_Info GB_Adot2B__min_lor_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_uint32
+GrB_Info GB_Adot3B__any_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40518,33 +52125,32 @@ GrB_Info GB_Adot3B__min_lor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_uint32
+GrB_Info GB_Asaxpy3B__any_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lor_uint64
+GrB_Info GB_Adot4B__any_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_uint64
+GrB_Info GB_Adot2B__any_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40552,10 +52158,10 @@ GrB_Info GB_Adot2B__min_lor_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_uint64
+GrB_Info GB_Adot3B__any_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40563,32 +52169,32 @@ GrB_Info GB_Adot3B__min_lor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_uint64
+GrB_Info GB_Asaxpy3B__any_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_lor_fp32
+GrB_Info GB_Adot4B__any_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_fp32
+GrB_Info GB_Adot2B__any_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40596,10 +52202,10 @@ GrB_Info GB_Adot2B__min_lor_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_fp32
+GrB_Info GB_Adot3B__any_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40607,32 +52213,32 @@ GrB_Info GB_Adot3B__min_lor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_fp32
+GrB_Info GB_Asaxpy3B__any_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_lor_fp64
+GrB_Info GB_Adot4B__any_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_lor_fp64
+GrB_Info GB_Adot2B__plus_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40640,10 +52246,10 @@ GrB_Info GB_Adot2B__min_lor_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_lor_fp64
+GrB_Info GB_Adot3B__plus_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40651,33 +52257,32 @@ GrB_Info GB_Adot3B__min_lor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lor_fp64
+GrB_Info GB_Asaxpy3B__plus_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_int8
+GrB_Info GB_Adot4B__plus_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_int8
+GrB_Info GB_Adot2B__plus_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40685,10 +52290,10 @@ GrB_Info GB_Adot2B__max_lor_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_int8
+GrB_Info GB_Adot3B__plus_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40696,33 +52301,32 @@ GrB_Info GB_Adot3B__max_lor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_int8
+GrB_Info GB_Asaxpy3B__plus_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_int16
+GrB_Info GB_Adot4B__plus_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_int16
+GrB_Info GB_Adot2B__plus_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40730,10 +52334,10 @@ GrB_Info GB_Adot2B__max_lor_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_int16
+GrB_Info GB_Adot3B__plus_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40741,33 +52345,32 @@ GrB_Info GB_Adot3B__max_lor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_int16
+GrB_Info GB_Asaxpy3B__plus_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_int32
+GrB_Info GB_Adot4B__plus_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_int32
+GrB_Info GB_Adot2B__plus_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40775,10 +52378,10 @@ GrB_Info GB_Adot2B__max_lor_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_int32
+GrB_Info GB_Adot3B__plus_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40786,33 +52389,32 @@ GrB_Info GB_Adot3B__max_lor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_int32
+GrB_Info GB_Asaxpy3B__plus_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_int64
+GrB_Info GB_Adot4B__plus_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_int64
+GrB_Info GB_Adot2B__plus_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40820,10 +52422,10 @@ GrB_Info GB_Adot2B__max_lor_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_int64
+GrB_Info GB_Adot3B__plus_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40831,33 +52433,32 @@ GrB_Info GB_Adot3B__max_lor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_int64
+GrB_Info GB_Asaxpy3B__plus_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_uint8
+GrB_Info GB_Adot4B__plus_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_uint8
+GrB_Info GB_Adot2B__plus_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40865,10 +52466,10 @@ GrB_Info GB_Adot2B__max_lor_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_uint8
+GrB_Info GB_Adot3B__plus_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40876,33 +52477,32 @@ GrB_Info GB_Adot3B__max_lor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_uint8
+GrB_Info GB_Asaxpy3B__plus_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_uint16
+GrB_Info GB_Adot4B__plus_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_uint16
+GrB_Info GB_Adot2B__plus_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40910,10 +52510,10 @@ GrB_Info GB_Adot2B__max_lor_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_uint16
+GrB_Info GB_Adot3B__plus_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40921,33 +52521,32 @@ GrB_Info GB_Adot3B__max_lor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_uint16
+GrB_Info GB_Asaxpy3B__plus_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_uint32
+GrB_Info GB_Adot4B__plus_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_uint32
+GrB_Info GB_Adot2B__plus_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -40955,10 +52554,10 @@ GrB_Info GB_Adot2B__max_lor_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_uint32
+GrB_Info GB_Adot3B__plus_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -40966,33 +52565,32 @@ GrB_Info GB_Adot3B__max_lor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_uint32
+GrB_Info GB_Asaxpy3B__plus_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lor_uint64
+GrB_Info GB_Adot4B__plus_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_uint64
+GrB_Info GB_Adot2B__plus_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41000,10 +52598,10 @@ GrB_Info GB_Adot2B__max_lor_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_uint64
+GrB_Info GB_Adot3B__plus_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41011,32 +52609,32 @@ GrB_Info GB_Adot3B__max_lor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_uint64
+GrB_Info GB_Asaxpy3B__plus_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_lor_fp32
+GrB_Info GB_Adot4B__plus_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_fp32
+GrB_Info GB_Adot2B__plus_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41044,10 +52642,10 @@ GrB_Info GB_Adot2B__max_lor_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_fp32
+GrB_Info GB_Adot3B__plus_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41055,32 +52653,32 @@ GrB_Info GB_Adot3B__max_lor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_fp32
+GrB_Info GB_Asaxpy3B__plus_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_lor_fp64
+GrB_Info GB_Adot4B__plus_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_lor_fp64
+GrB_Info GB_Adot2B__times_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41088,10 +52686,10 @@ GrB_Info GB_Adot2B__max_lor_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_lor_fp64
+GrB_Info GB_Adot3B__times_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41099,33 +52697,32 @@ GrB_Info GB_Adot3B__max_lor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lor_fp64
+GrB_Info GB_Asaxpy3B__times_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_int8
+GrB_Info GB_Adot4B__times_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_int8
+GrB_Info GB_Adot2B__times_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41133,10 +52730,10 @@ GrB_Info GB_Adot2B__plus_lor_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_int8
+GrB_Info GB_Adot3B__times_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41144,33 +52741,32 @@ GrB_Info GB_Adot3B__plus_lor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_int8
+GrB_Info GB_Asaxpy3B__times_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_uint8
+GrB_Info GB_Adot4B__times_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_uint8
+GrB_Info GB_Adot2B__times_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41178,10 +52774,10 @@ GrB_Info GB_Adot2B__plus_lor_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_uint8
+GrB_Info GB_Adot3B__times_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41189,33 +52785,32 @@ GrB_Info GB_Adot3B__plus_lor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_uint8
+GrB_Info GB_Asaxpy3B__times_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_int16
+GrB_Info GB_Adot4B__times_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_int16
+GrB_Info GB_Adot2B__times_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41223,10 +52818,10 @@ GrB_Info GB_Adot2B__plus_lor_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_int16
+GrB_Info GB_Adot3B__times_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41234,33 +52829,32 @@ GrB_Info GB_Adot3B__plus_lor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_int16
+GrB_Info GB_Asaxpy3B__times_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_uint16
+GrB_Info GB_Adot4B__times_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_uint16
+GrB_Info GB_Adot2B__times_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41268,10 +52862,10 @@ GrB_Info GB_Adot2B__plus_lor_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_uint16
+GrB_Info GB_Adot3B__times_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41279,33 +52873,32 @@ GrB_Info GB_Adot3B__plus_lor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_uint16
+GrB_Info GB_Asaxpy3B__times_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_int32
+GrB_Info GB_Adot4B__times_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_int32
+GrB_Info GB_Adot2B__times_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41313,10 +52906,10 @@ GrB_Info GB_Adot2B__plus_lor_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_int32
+GrB_Info GB_Adot3B__times_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41324,33 +52917,32 @@ GrB_Info GB_Adot3B__plus_lor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_int32
+GrB_Info GB_Asaxpy3B__times_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_uint32
+GrB_Info GB_Adot4B__times_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_uint32
+GrB_Info GB_Adot2B__times_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41358,10 +52950,10 @@ GrB_Info GB_Adot2B__plus_lor_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_uint32
+GrB_Info GB_Adot3B__times_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41369,33 +52961,32 @@ GrB_Info GB_Adot3B__plus_lor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_uint32
+GrB_Info GB_Asaxpy3B__times_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_int64
+GrB_Info GB_Adot4B__times_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_int64
+GrB_Info GB_Adot2B__times_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41403,10 +52994,10 @@ GrB_Info GB_Adot2B__plus_lor_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_int64
+GrB_Info GB_Adot3B__times_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41414,33 +53005,32 @@ GrB_Info GB_Adot3B__plus_lor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_int64
+GrB_Info GB_Asaxpy3B__times_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_lor_uint64
+GrB_Info GB_Adot4B__times_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_uint64
+GrB_Info GB_Adot2B__times_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41448,10 +53038,10 @@ GrB_Info GB_Adot2B__plus_lor_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_uint64
+GrB_Info GB_Adot3B__times_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41459,32 +53049,32 @@ GrB_Info GB_Adot3B__plus_lor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_uint64
+GrB_Info GB_Asaxpy3B__times_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_lor_fp32
+GrB_Info GB_Adot4B__times_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_fp32
+GrB_Info GB_Adot2B__times_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41492,10 +53082,10 @@ GrB_Info GB_Adot2B__plus_lor_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_fp32
+GrB_Info GB_Adot3B__times_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41503,32 +53093,32 @@ GrB_Info GB_Adot3B__plus_lor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_fp32
+GrB_Info GB_Asaxpy3B__times_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_lor_fp64
+GrB_Info GB_Adot4B__times_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__plus_lor_fp64
+GrB_Info GB_Adot2B__lor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41536,10 +53126,10 @@ GrB_Info GB_Adot2B__plus_lor_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lor_fp64
+GrB_Info GB_Adot3B__lor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41547,33 +53137,32 @@ GrB_Info GB_Adot3B__plus_lor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lor_fp64
+GrB_Info GB_Asaxpy3B__lor_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_int8
+GrB_Info GB_Adot4B__lor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_int8
+GrB_Info GB_Adot2B__land_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41581,10 +53170,10 @@ GrB_Info GB_Adot2B__times_lor_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_int8
+GrB_Info GB_Adot3B__land_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41592,33 +53181,32 @@ GrB_Info GB_Adot3B__times_lor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_int8
+GrB_Info GB_Asaxpy3B__land_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_uint8
+GrB_Info GB_Adot4B__land_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_uint8
+GrB_Info GB_Adot2B__lxor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41626,10 +53214,10 @@ GrB_Info GB_Adot2B__times_lor_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_uint8
+GrB_Info GB_Adot3B__lxor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41637,33 +53225,32 @@ GrB_Info GB_Adot3B__times_lor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_uint8
+GrB_Info GB_Asaxpy3B__lxor_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_int16
+GrB_Info GB_Adot4B__lxor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_int16
+GrB_Info GB_Adot2B__any_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41671,10 +53258,10 @@ GrB_Info GB_Adot2B__times_lor_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_int16
+GrB_Info GB_Adot3B__any_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41682,33 +53269,32 @@ GrB_Info GB_Adot3B__times_lor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_int16
+GrB_Info GB_Asaxpy3B__any_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_uint16
+GrB_Info GB_Adot4B__any_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_uint16
+GrB_Info GB_Adot2B__eq_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41716,10 +53302,10 @@ GrB_Info GB_Adot2B__times_lor_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_uint16
+GrB_Info GB_Adot3B__eq_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41727,33 +53313,32 @@ GrB_Info GB_Adot3B__times_lor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_uint16
+GrB_Info GB_Asaxpy3B__eq_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
-) ;
-
-
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
 
-GrB_Info GB_AgusB__times_lor_int32
+GrB_Info GB_Adot4B__eq_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_int32
+GrB_Info GB_Adot2B__min_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41761,10 +53346,10 @@ GrB_Info GB_Adot2B__times_lor_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_int32
+GrB_Info GB_Adot3B__min_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41772,33 +53357,32 @@ GrB_Info GB_Adot3B__times_lor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_int32
+GrB_Info GB_Asaxpy3B__min_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_uint32
+GrB_Info GB_Adot4B__min_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_uint32
+GrB_Info GB_Adot2B__min_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41806,10 +53390,10 @@ GrB_Info GB_Adot2B__times_lor_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_uint32
+GrB_Info GB_Adot3B__min_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41817,33 +53401,32 @@ GrB_Info GB_Adot3B__times_lor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_uint32
+GrB_Info GB_Asaxpy3B__min_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_int64
+GrB_Info GB_Adot4B__min_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_int64
+GrB_Info GB_Adot2B__min_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41851,10 +53434,10 @@ GrB_Info GB_Adot2B__times_lor_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_int64
+GrB_Info GB_Adot3B__min_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41862,33 +53445,32 @@ GrB_Info GB_Adot3B__times_lor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_int64
+GrB_Info GB_Asaxpy3B__min_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lor_uint64
+GrB_Info GB_Adot4B__min_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_uint64
+GrB_Info GB_Adot2B__min_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41896,10 +53478,10 @@ GrB_Info GB_Adot2B__times_lor_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_uint64
+GrB_Info GB_Adot3B__min_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41907,32 +53489,32 @@ GrB_Info GB_Adot3B__times_lor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_uint64
+GrB_Info GB_Asaxpy3B__min_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_lor_fp32
+GrB_Info GB_Adot4B__min_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_fp32
+GrB_Info GB_Adot2B__min_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41940,10 +53522,10 @@ GrB_Info GB_Adot2B__times_lor_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_fp32
+GrB_Info GB_Adot3B__min_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41951,32 +53533,32 @@ GrB_Info GB_Adot3B__times_lor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_fp32
+GrB_Info GB_Asaxpy3B__min_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_lor_fp64
+GrB_Info GB_Adot4B__min_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__times_lor_fp64
+GrB_Info GB_Adot2B__min_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -41984,10 +53566,10 @@ GrB_Info GB_Adot2B__times_lor_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__times_lor_fp64
+GrB_Info GB_Adot3B__min_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -41995,32 +53577,32 @@ GrB_Info GB_Adot3B__times_lor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lor_fp64
+GrB_Info GB_Asaxpy3B__min_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lor_bool
+GrB_Info GB_Adot4B__min_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lor_lor_bool
+GrB_Info GB_Adot2B__min_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42028,10 +53610,10 @@ GrB_Info GB_Adot2B__lor_lor_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lor_lor_bool
+GrB_Info GB_Adot3B__min_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42039,32 +53621,32 @@ GrB_Info GB_Adot3B__lor_lor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lor_bool
+GrB_Info GB_Asaxpy3B__min_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lor_bool
+GrB_Info GB_Adot4B__min_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__land_lor_bool
+GrB_Info GB_Adot2B__min_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42072,10 +53654,10 @@ GrB_Info GB_Adot2B__land_lor_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__land_lor_bool
+GrB_Info GB_Adot3B__min_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42083,32 +53665,32 @@ GrB_Info GB_Adot3B__land_lor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lor_bool
+GrB_Info GB_Asaxpy3B__min_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lor_bool
+GrB_Info GB_Adot4B__min_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__lxor_lor_bool
+GrB_Info GB_Adot2B__min_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42116,10 +53698,10 @@ GrB_Info GB_Adot2B__lxor_lor_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__lxor_lor_bool
+GrB_Info GB_Adot3B__min_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42127,32 +53709,32 @@ GrB_Info GB_Adot3B__lxor_lor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lor_bool
+GrB_Info GB_Asaxpy3B__min_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__eq_lor_bool
+GrB_Info GB_Adot4B__min_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__eq_lor_bool
+GrB_Info GB_Adot2B__min_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42160,10 +53742,10 @@ GrB_Info GB_Adot2B__eq_lor_bool
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__eq_lor_bool
+GrB_Info GB_Adot3B__min_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42171,33 +53753,32 @@ GrB_Info GB_Adot3B__eq_lor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lor_bool
+GrB_Info GB_Asaxpy3B__min_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_int8
+GrB_Info GB_Adot4B__min_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_int8
+GrB_Info GB_Adot2B__max_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42205,10 +53786,10 @@ GrB_Info GB_Adot2B__min_land_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_int8
+GrB_Info GB_Adot3B__max_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42216,33 +53797,32 @@ GrB_Info GB_Adot3B__min_land_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_int8
+GrB_Info GB_Asaxpy3B__max_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_int16
+GrB_Info GB_Adot4B__max_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_int16
+GrB_Info GB_Adot2B__max_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42250,10 +53830,10 @@ GrB_Info GB_Adot2B__min_land_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_int16
+GrB_Info GB_Adot3B__max_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42261,33 +53841,32 @@ GrB_Info GB_Adot3B__min_land_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_int16
+GrB_Info GB_Asaxpy3B__max_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_int32
+GrB_Info GB_Adot4B__max_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_int32
+GrB_Info GB_Adot2B__max_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42295,10 +53874,10 @@ GrB_Info GB_Adot2B__min_land_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_int32
+GrB_Info GB_Adot3B__max_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42306,33 +53885,32 @@ GrB_Info GB_Adot3B__min_land_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_int32
+GrB_Info GB_Asaxpy3B__max_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_int64
+GrB_Info GB_Adot4B__max_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_int64
+GrB_Info GB_Adot2B__max_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42340,10 +53918,10 @@ GrB_Info GB_Adot2B__min_land_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_int64
+GrB_Info GB_Adot3B__max_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42351,33 +53929,32 @@ GrB_Info GB_Adot3B__min_land_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_int64
+GrB_Info GB_Asaxpy3B__max_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_uint8
+GrB_Info GB_Adot4B__max_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_uint8
+GrB_Info GB_Adot2B__max_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42385,10 +53962,10 @@ GrB_Info GB_Adot2B__min_land_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_uint8
+GrB_Info GB_Adot3B__max_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42396,33 +53973,32 @@ GrB_Info GB_Adot3B__min_land_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_uint8
+GrB_Info GB_Asaxpy3B__max_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_uint16
+GrB_Info GB_Adot4B__max_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_uint16
+GrB_Info GB_Adot2B__max_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42430,10 +54006,10 @@ GrB_Info GB_Adot2B__min_land_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_uint16
+GrB_Info GB_Adot3B__max_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42441,33 +54017,32 @@ GrB_Info GB_Adot3B__min_land_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_uint16
+GrB_Info GB_Asaxpy3B__max_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_uint32
+GrB_Info GB_Adot4B__max_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_uint32
+GrB_Info GB_Adot2B__max_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42475,10 +54050,10 @@ GrB_Info GB_Adot2B__min_land_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_uint32
+GrB_Info GB_Adot3B__max_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42486,33 +54061,32 @@ GrB_Info GB_Adot3B__min_land_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_uint32
+GrB_Info GB_Asaxpy3B__max_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_land_uint64
+GrB_Info GB_Adot4B__max_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_uint64
+GrB_Info GB_Adot2B__max_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42520,10 +54094,10 @@ GrB_Info GB_Adot2B__min_land_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_uint64
+GrB_Info GB_Adot3B__max_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42531,32 +54105,32 @@ GrB_Info GB_Adot3B__min_land_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_uint64
+GrB_Info GB_Asaxpy3B__max_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_land_fp32
+GrB_Info GB_Adot4B__max_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_fp32
+GrB_Info GB_Adot2B__max_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42564,10 +54138,10 @@ GrB_Info GB_Adot2B__min_land_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_fp32
+GrB_Info GB_Adot3B__max_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42575,32 +54149,32 @@ GrB_Info GB_Adot3B__min_land_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_fp32
+GrB_Info GB_Asaxpy3B__max_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_land_fp64
+GrB_Info GB_Adot4B__max_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__min_land_fp64
+GrB_Info GB_Adot2B__max_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42608,10 +54182,10 @@ GrB_Info GB_Adot2B__min_land_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__min_land_fp64
+GrB_Info GB_Adot3B__max_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42619,33 +54193,32 @@ GrB_Info GB_Adot3B__min_land_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_land_fp64
+GrB_Info GB_Asaxpy3B__max_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_int8
+GrB_Info GB_Adot4B__max_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_int8
+GrB_Info GB_Adot2B__any_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42653,10 +54226,10 @@ GrB_Info GB_Adot2B__max_land_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_int8
+GrB_Info GB_Adot3B__any_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42664,33 +54237,32 @@ GrB_Info GB_Adot3B__max_land_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_int8
+GrB_Info GB_Asaxpy3B__any_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_int16
+GrB_Info GB_Adot4B__any_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_int16
+GrB_Info GB_Adot2B__any_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42698,10 +54270,10 @@ GrB_Info GB_Adot2B__max_land_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_int16
+GrB_Info GB_Adot3B__any_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42709,33 +54281,32 @@ GrB_Info GB_Adot3B__max_land_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_int16
+GrB_Info GB_Asaxpy3B__any_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_int32
+GrB_Info GB_Adot4B__any_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_int32
+GrB_Info GB_Adot2B__any_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42743,10 +54314,10 @@ GrB_Info GB_Adot2B__max_land_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_int32
+GrB_Info GB_Adot3B__any_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42754,33 +54325,32 @@ GrB_Info GB_Adot3B__max_land_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_int32
+GrB_Info GB_Asaxpy3B__any_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_int64
+GrB_Info GB_Adot4B__any_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_int64
+GrB_Info GB_Adot2B__any_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42788,10 +54358,10 @@ GrB_Info GB_Adot2B__max_land_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_int64
+GrB_Info GB_Adot3B__any_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42799,33 +54369,32 @@ GrB_Info GB_Adot3B__max_land_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_int64
+GrB_Info GB_Asaxpy3B__any_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_uint8
+GrB_Info GB_Adot4B__any_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_uint8
+GrB_Info GB_Adot2B__any_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42833,10 +54402,10 @@ GrB_Info GB_Adot2B__max_land_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_uint8
+GrB_Info GB_Adot3B__any_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42844,33 +54413,32 @@ GrB_Info GB_Adot3B__max_land_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_uint8
+GrB_Info GB_Asaxpy3B__any_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_uint16
+GrB_Info GB_Adot4B__any_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_uint16
+GrB_Info GB_Adot2B__any_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42878,10 +54446,10 @@ GrB_Info GB_Adot2B__max_land_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_uint16
+GrB_Info GB_Adot3B__any_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42889,33 +54457,32 @@ GrB_Info GB_Adot3B__max_land_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_uint16
+GrB_Info GB_Asaxpy3B__any_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_uint32
+GrB_Info GB_Adot4B__any_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_uint32
+GrB_Info GB_Adot2B__any_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42923,10 +54490,10 @@ GrB_Info GB_Adot2B__max_land_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_uint32
+GrB_Info GB_Adot3B__any_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42934,33 +54501,32 @@ GrB_Info GB_Adot3B__max_land_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_uint32
+GrB_Info GB_Asaxpy3B__any_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_land_uint64
+GrB_Info GB_Adot4B__any_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_uint64
+GrB_Info GB_Adot2B__any_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -42968,10 +54534,10 @@ GrB_Info GB_Adot2B__max_land_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_uint64
+GrB_Info GB_Adot3B__any_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -42979,32 +54545,32 @@ GrB_Info GB_Adot3B__max_land_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_uint64
+GrB_Info GB_Asaxpy3B__any_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_land_fp32
+GrB_Info GB_Adot4B__any_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_fp32
+GrB_Info GB_Adot2B__any_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43012,10 +54578,10 @@ GrB_Info GB_Adot2B__max_land_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_fp32
+GrB_Info GB_Adot3B__any_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43023,32 +54589,32 @@ GrB_Info GB_Adot3B__max_land_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_fp32
+GrB_Info GB_Asaxpy3B__any_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_land_fp64
+GrB_Info GB_Adot4B__any_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
-GrB_Info GB_Adot2B__max_land_fp64
+GrB_Info GB_Adot2B__any_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43056,10 +54622,10 @@ GrB_Info GB_Adot2B__max_land_fp64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__max_land_fp64
+GrB_Info GB_Adot3B__any_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43067,33 +54633,32 @@ GrB_Info GB_Adot3B__max_land_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_land_fp64
+GrB_Info GB_Asaxpy3B__any_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_int8
+GrB_Info GB_Adot4B__any_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43104,7 +54669,7 @@ GrB_Info GB_Adot2B__plus_land_int8
 GrB_Info GB_Adot3B__plus_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43112,33 +54677,32 @@ GrB_Info GB_Adot3B__plus_land_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_int8
+GrB_Info GB_Asaxpy3B__plus_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_uint8
+GrB_Info GB_Adot4B__plus_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43149,7 +54713,7 @@ GrB_Info GB_Adot2B__plus_land_uint8
 GrB_Info GB_Adot3B__plus_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43157,33 +54721,32 @@ GrB_Info GB_Adot3B__plus_land_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_uint8
+GrB_Info GB_Asaxpy3B__plus_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_int16
+GrB_Info GB_Adot4B__plus_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43194,7 +54757,7 @@ GrB_Info GB_Adot2B__plus_land_int16
 GrB_Info GB_Adot3B__plus_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43202,33 +54765,32 @@ GrB_Info GB_Adot3B__plus_land_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_int16
+GrB_Info GB_Asaxpy3B__plus_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_uint16
+GrB_Info GB_Adot4B__plus_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43239,7 +54801,7 @@ GrB_Info GB_Adot2B__plus_land_uint16
 GrB_Info GB_Adot3B__plus_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43247,33 +54809,32 @@ GrB_Info GB_Adot3B__plus_land_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_uint16
+GrB_Info GB_Asaxpy3B__plus_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_int32
+GrB_Info GB_Adot4B__plus_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43284,7 +54845,7 @@ GrB_Info GB_Adot2B__plus_land_int32
 GrB_Info GB_Adot3B__plus_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43292,33 +54853,32 @@ GrB_Info GB_Adot3B__plus_land_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_int32
+GrB_Info GB_Asaxpy3B__plus_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_uint32
+GrB_Info GB_Adot4B__plus_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43329,7 +54889,7 @@ GrB_Info GB_Adot2B__plus_land_uint32
 GrB_Info GB_Adot3B__plus_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43337,33 +54897,32 @@ GrB_Info GB_Adot3B__plus_land_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_uint32
+GrB_Info GB_Asaxpy3B__plus_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_int64
+GrB_Info GB_Adot4B__plus_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43374,7 +54933,7 @@ GrB_Info GB_Adot2B__plus_land_int64
 GrB_Info GB_Adot3B__plus_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43382,33 +54941,32 @@ GrB_Info GB_Adot3B__plus_land_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_int64
+GrB_Info GB_Asaxpy3B__plus_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__plus_land_uint64
+GrB_Info GB_Adot4B__plus_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43419,7 +54977,7 @@ GrB_Info GB_Adot2B__plus_land_uint64
 GrB_Info GB_Adot3B__plus_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43427,32 +54985,32 @@ GrB_Info GB_Adot3B__plus_land_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_uint64
+GrB_Info GB_Asaxpy3B__plus_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_land_fp32
+GrB_Info GB_Adot4B__plus_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43463,7 +55021,7 @@ GrB_Info GB_Adot2B__plus_land_fp32
 GrB_Info GB_Adot3B__plus_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43471,32 +55029,32 @@ GrB_Info GB_Adot3B__plus_land_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_fp32
+GrB_Info GB_Asaxpy3B__plus_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__plus_land_fp64
+GrB_Info GB_Adot4B__plus_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__plus_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43507,7 +55065,7 @@ GrB_Info GB_Adot2B__plus_land_fp64
 GrB_Info GB_Adot3B__plus_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43515,33 +55073,32 @@ GrB_Info GB_Adot3B__plus_land_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_land_fp64
+GrB_Info GB_Asaxpy3B__plus_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_int8
+GrB_Info GB_Adot4B__plus_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43552,7 +55109,7 @@ GrB_Info GB_Adot2B__times_land_int8
 GrB_Info GB_Adot3B__times_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43560,33 +55117,32 @@ GrB_Info GB_Adot3B__times_land_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_int8
+GrB_Info GB_Asaxpy3B__times_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_uint8
+GrB_Info GB_Adot4B__times_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43597,7 +55153,7 @@ GrB_Info GB_Adot2B__times_land_uint8
 GrB_Info GB_Adot3B__times_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43605,33 +55161,32 @@ GrB_Info GB_Adot3B__times_land_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_uint8
+GrB_Info GB_Asaxpy3B__times_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_int16
+GrB_Info GB_Adot4B__times_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43642,7 +55197,7 @@ GrB_Info GB_Adot2B__times_land_int16
 GrB_Info GB_Adot3B__times_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43650,33 +55205,32 @@ GrB_Info GB_Adot3B__times_land_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_int16
+GrB_Info GB_Asaxpy3B__times_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_uint16
+GrB_Info GB_Adot4B__times_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43687,7 +55241,7 @@ GrB_Info GB_Adot2B__times_land_uint16
 GrB_Info GB_Adot3B__times_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43695,33 +55249,32 @@ GrB_Info GB_Adot3B__times_land_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_uint16
+GrB_Info GB_Asaxpy3B__times_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_int32
+GrB_Info GB_Adot4B__times_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43732,7 +55285,7 @@ GrB_Info GB_Adot2B__times_land_int32
 GrB_Info GB_Adot3B__times_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43740,33 +55293,32 @@ GrB_Info GB_Adot3B__times_land_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_int32
+GrB_Info GB_Asaxpy3B__times_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_uint32
+GrB_Info GB_Adot4B__times_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43777,7 +55329,7 @@ GrB_Info GB_Adot2B__times_land_uint32
 GrB_Info GB_Adot3B__times_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43785,33 +55337,32 @@ GrB_Info GB_Adot3B__times_land_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_uint32
+GrB_Info GB_Asaxpy3B__times_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_int64
+GrB_Info GB_Adot4B__times_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43822,7 +55373,7 @@ GrB_Info GB_Adot2B__times_land_int64
 GrB_Info GB_Adot3B__times_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43830,33 +55381,32 @@ GrB_Info GB_Adot3B__times_land_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_int64
+GrB_Info GB_Asaxpy3B__times_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_land_uint64
+GrB_Info GB_Adot4B__times_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43867,7 +55417,7 @@ GrB_Info GB_Adot2B__times_land_uint64
 GrB_Info GB_Adot3B__times_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43875,32 +55425,32 @@ GrB_Info GB_Adot3B__times_land_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_uint64
+GrB_Info GB_Asaxpy3B__times_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_land_fp32
+GrB_Info GB_Adot4B__times_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43911,7 +55461,7 @@ GrB_Info GB_Adot2B__times_land_fp32
 GrB_Info GB_Adot3B__times_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43919,32 +55469,32 @@ GrB_Info GB_Adot3B__times_land_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_fp32
+GrB_Info GB_Asaxpy3B__times_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_land_fp64
+GrB_Info GB_Adot4B__times_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43955,7 +55505,7 @@ GrB_Info GB_Adot2B__times_land_fp64
 GrB_Info GB_Adot3B__times_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -43963,32 +55513,32 @@ GrB_Info GB_Adot3B__times_land_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_land_fp64
+GrB_Info GB_Asaxpy3B__times_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_land_bool
+GrB_Info GB_Adot4B__times_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -43999,7 +55549,7 @@ GrB_Info GB_Adot2B__lor_land_bool
 GrB_Info GB_Adot3B__lor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44007,32 +55557,32 @@ GrB_Info GB_Adot3B__lor_land_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_land_bool
+GrB_Info GB_Asaxpy3B__lor_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_land_bool
+GrB_Info GB_Adot4B__lor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__land_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44043,7 +55593,7 @@ GrB_Info GB_Adot2B__land_land_bool
 GrB_Info GB_Adot3B__land_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44051,32 +55601,32 @@ GrB_Info GB_Adot3B__land_land_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_land_bool
+GrB_Info GB_Asaxpy3B__land_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_land_bool
+GrB_Info GB_Adot4B__land_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lxor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44087,7 +55637,7 @@ GrB_Info GB_Adot2B__lxor_land_bool
 GrB_Info GB_Adot3B__lxor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44095,32 +55645,76 @@ GrB_Info GB_Adot3B__lxor_land_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_land_bool
+GrB_Info GB_Asaxpy3B__lxor_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__lxor_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
 ) ;
 
+GrB_Info GB_Adot3B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__eq_land_bool
+GrB_Info GB_Asaxpy3B__any_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
 GrB_Info GB_Adot2B__eq_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44131,7 +55725,7 @@ GrB_Info GB_Adot2B__eq_land_bool
 GrB_Info GB_Adot3B__eq_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44139,33 +55733,32 @@ GrB_Info GB_Adot3B__eq_land_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_land_bool
+GrB_Info GB_Asaxpy3B__eq_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_int8
+GrB_Info GB_Adot4B__eq_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44176,7 +55769,7 @@ GrB_Info GB_Adot2B__min_lxor_int8
 GrB_Info GB_Adot3B__min_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44184,33 +55777,32 @@ GrB_Info GB_Adot3B__min_lxor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_int8
+GrB_Info GB_Asaxpy3B__min_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_int16
+GrB_Info GB_Adot4B__min_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44221,7 +55813,7 @@ GrB_Info GB_Adot2B__min_lxor_int16
 GrB_Info GB_Adot3B__min_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44229,33 +55821,32 @@ GrB_Info GB_Adot3B__min_lxor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_int16
+GrB_Info GB_Asaxpy3B__min_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_int32
+GrB_Info GB_Adot4B__min_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44266,7 +55857,7 @@ GrB_Info GB_Adot2B__min_lxor_int32
 GrB_Info GB_Adot3B__min_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44274,33 +55865,32 @@ GrB_Info GB_Adot3B__min_lxor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_int32
+GrB_Info GB_Asaxpy3B__min_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_int64
+GrB_Info GB_Adot4B__min_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44311,7 +55901,7 @@ GrB_Info GB_Adot2B__min_lxor_int64
 GrB_Info GB_Adot3B__min_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44319,33 +55909,32 @@ GrB_Info GB_Adot3B__min_lxor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_int64
+GrB_Info GB_Asaxpy3B__min_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_uint8
+GrB_Info GB_Adot4B__min_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44356,7 +55945,7 @@ GrB_Info GB_Adot2B__min_lxor_uint8
 GrB_Info GB_Adot3B__min_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44364,33 +55953,32 @@ GrB_Info GB_Adot3B__min_lxor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_uint8
+GrB_Info GB_Asaxpy3B__min_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_uint16
+GrB_Info GB_Adot4B__min_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44401,7 +55989,7 @@ GrB_Info GB_Adot2B__min_lxor_uint16
 GrB_Info GB_Adot3B__min_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44409,33 +55997,32 @@ GrB_Info GB_Adot3B__min_lxor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_uint16
+GrB_Info GB_Asaxpy3B__min_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_uint32
+GrB_Info GB_Adot4B__min_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44446,7 +56033,7 @@ GrB_Info GB_Adot2B__min_lxor_uint32
 GrB_Info GB_Adot3B__min_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44454,33 +56041,32 @@ GrB_Info GB_Adot3B__min_lxor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_uint32
+GrB_Info GB_Asaxpy3B__min_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__min_lxor_uint64
+GrB_Info GB_Adot4B__min_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44491,7 +56077,7 @@ GrB_Info GB_Adot2B__min_lxor_uint64
 GrB_Info GB_Adot3B__min_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44499,32 +56085,32 @@ GrB_Info GB_Adot3B__min_lxor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_uint64
+GrB_Info GB_Asaxpy3B__min_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_lxor_fp32
+GrB_Info GB_Adot4B__min_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44535,7 +56121,7 @@ GrB_Info GB_Adot2B__min_lxor_fp32
 GrB_Info GB_Adot3B__min_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44543,32 +56129,32 @@ GrB_Info GB_Adot3B__min_lxor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_fp32
+GrB_Info GB_Asaxpy3B__min_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__min_lxor_fp64
+GrB_Info GB_Adot4B__min_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__min_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44579,7 +56165,7 @@ GrB_Info GB_Adot2B__min_lxor_fp64
 GrB_Info GB_Adot3B__min_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44587,33 +56173,32 @@ GrB_Info GB_Adot3B__min_lxor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__min_lxor_fp64
+GrB_Info GB_Asaxpy3B__min_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_int8
+GrB_Info GB_Adot4B__min_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44624,7 +56209,7 @@ GrB_Info GB_Adot2B__max_lxor_int8
 GrB_Info GB_Adot3B__max_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44632,33 +56217,32 @@ GrB_Info GB_Adot3B__max_lxor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_int8
+GrB_Info GB_Asaxpy3B__max_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_int16
+GrB_Info GB_Adot4B__max_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44669,7 +56253,7 @@ GrB_Info GB_Adot2B__max_lxor_int16
 GrB_Info GB_Adot3B__max_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44677,33 +56261,32 @@ GrB_Info GB_Adot3B__max_lxor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_int16
+GrB_Info GB_Asaxpy3B__max_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_int32
+GrB_Info GB_Adot4B__max_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44714,7 +56297,7 @@ GrB_Info GB_Adot2B__max_lxor_int32
 GrB_Info GB_Adot3B__max_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44722,33 +56305,32 @@ GrB_Info GB_Adot3B__max_lxor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_int32
+GrB_Info GB_Asaxpy3B__max_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_int64
+GrB_Info GB_Adot4B__max_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44759,7 +56341,7 @@ GrB_Info GB_Adot2B__max_lxor_int64
 GrB_Info GB_Adot3B__max_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44767,33 +56349,32 @@ GrB_Info GB_Adot3B__max_lxor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_int64
+GrB_Info GB_Asaxpy3B__max_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_uint8
+GrB_Info GB_Adot4B__max_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44804,7 +56385,7 @@ GrB_Info GB_Adot2B__max_lxor_uint8
 GrB_Info GB_Adot3B__max_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44812,33 +56393,32 @@ GrB_Info GB_Adot3B__max_lxor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_uint8
+GrB_Info GB_Asaxpy3B__max_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_uint16
+GrB_Info GB_Adot4B__max_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44849,7 +56429,7 @@ GrB_Info GB_Adot2B__max_lxor_uint16
 GrB_Info GB_Adot3B__max_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44857,33 +56437,32 @@ GrB_Info GB_Adot3B__max_lxor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_uint16
+GrB_Info GB_Asaxpy3B__max_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_uint32
+GrB_Info GB_Adot4B__max_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44894,7 +56473,7 @@ GrB_Info GB_Adot2B__max_lxor_uint32
 GrB_Info GB_Adot3B__max_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44902,33 +56481,32 @@ GrB_Info GB_Adot3B__max_lxor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_uint32
+GrB_Info GB_Asaxpy3B__max_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__max_lxor_uint64
+GrB_Info GB_Adot4B__max_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44939,7 +56517,7 @@ GrB_Info GB_Adot2B__max_lxor_uint64
 GrB_Info GB_Adot3B__max_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44947,32 +56525,32 @@ GrB_Info GB_Adot3B__max_lxor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_uint64
+GrB_Info GB_Asaxpy3B__max_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_lxor_fp32
+GrB_Info GB_Adot4B__max_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -44983,7 +56561,7 @@ GrB_Info GB_Adot2B__max_lxor_fp32
 GrB_Info GB_Adot3B__max_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -44991,32 +56569,32 @@ GrB_Info GB_Adot3B__max_lxor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_fp32
+GrB_Info GB_Asaxpy3B__max_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__max_lxor_fp64
+GrB_Info GB_Adot4B__max_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__max_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45027,7 +56605,7 @@ GrB_Info GB_Adot2B__max_lxor_fp64
 GrB_Info GB_Adot3B__max_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45035,33 +56613,76 @@ GrB_Info GB_Adot3B__max_lxor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__max_lxor_fp64
+GrB_Info GB_Asaxpy3B__max_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__max_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_int8
+GrB_Info GB_Asaxpy3B__any_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_int8
+GrB_Info GB_Adot4B__any_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45069,10 +56690,10 @@ GrB_Info GB_Adot2B__plus_lxor_int8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_int8
+GrB_Info GB_Adot3B__any_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45080,33 +56701,76 @@ GrB_Info GB_Adot3B__plus_lxor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_int8
+GrB_Info GB_Asaxpy3B__any_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_uint8
+GrB_Info GB_Asaxpy3B__any_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_uint8
+GrB_Info GB_Adot4B__any_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45114,10 +56778,10 @@ GrB_Info GB_Adot2B__plus_lxor_uint8
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_uint8
+GrB_Info GB_Adot3B__any_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45125,33 +56789,76 @@ GrB_Info GB_Adot3B__plus_lxor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_uint8
+GrB_Info GB_Asaxpy3B__any_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_int16
+GrB_Info GB_Asaxpy3B__any_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_int16
+GrB_Info GB_Adot4B__any_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45159,10 +56866,10 @@ GrB_Info GB_Adot2B__plus_lxor_int16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_int16
+GrB_Info GB_Adot3B__any_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45170,33 +56877,76 @@ GrB_Info GB_Adot3B__plus_lxor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_int16
+GrB_Info GB_Asaxpy3B__any_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_uint16
+GrB_Info GB_Asaxpy3B__any_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_uint16
+GrB_Info GB_Adot4B__any_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45204,10 +56954,10 @@ GrB_Info GB_Adot2B__plus_lxor_uint16
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_uint16
+GrB_Info GB_Adot3B__any_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45215,33 +56965,76 @@ GrB_Info GB_Adot3B__plus_lxor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_uint16
+GrB_Info GB_Asaxpy3B__any_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_int32
+GrB_Info GB_Asaxpy3B__any_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_int32
+GrB_Info GB_Adot4B__any_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45249,10 +57042,10 @@ GrB_Info GB_Adot2B__plus_lxor_int32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_int32
+GrB_Info GB_Adot3B__any_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45260,33 +57053,76 @@ GrB_Info GB_Adot3B__plus_lxor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_int32
+GrB_Info GB_Asaxpy3B__any_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_uint32
+GrB_Info GB_Asaxpy3B__plus_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_uint32
+GrB_Info GB_Adot4B__plus_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45294,10 +57130,10 @@ GrB_Info GB_Adot2B__plus_lxor_uint32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_uint32
+GrB_Info GB_Adot3B__plus_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45305,33 +57141,76 @@ GrB_Info GB_Adot3B__plus_lxor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_uint32
+GrB_Info GB_Asaxpy3B__plus_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_int64
+GrB_Info GB_Asaxpy3B__plus_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_int64
+GrB_Info GB_Adot4B__plus_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45339,10 +57218,10 @@ GrB_Info GB_Adot2B__plus_lxor_int64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_int64
+GrB_Info GB_Adot3B__plus_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45350,33 +57229,76 @@ GrB_Info GB_Adot3B__plus_lxor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_int64
+GrB_Info GB_Asaxpy3B__plus_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
+GrB_Info GB_Adot3B__plus_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_uint64
+GrB_Info GB_Asaxpy3B__plus_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_uint64
+GrB_Info GB_Adot4B__plus_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45384,10 +57306,10 @@ GrB_Info GB_Adot2B__plus_lxor_uint64
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_uint64
+GrB_Info GB_Adot3B__plus_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45395,32 +57317,76 @@ GrB_Info GB_Adot3B__plus_lxor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_uint64
+GrB_Info GB_Asaxpy3B__plus_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_fp32
+GrB_Info GB_Adot3B__plus_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
-GrB_Info GB_Adot2B__plus_lxor_fp32
+GrB_Info GB_Asaxpy3B__plus_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45428,10 +57394,10 @@ GrB_Info GB_Adot2B__plus_lxor_fp32
     int nthreads, int naslice, int nbslice
 ) ;
 
-GrB_Info GB_Adot3B__plus_lxor_fp32
+GrB_Info GB_Adot3B__plus_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45439,32 +57405,76 @@ GrB_Info GB_Adot3B__plus_lxor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_fp32
+GrB_Info GB_Asaxpy3B__plus_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__plus_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__plus_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
 
-GrB_Info GB_AgusB__plus_lxor_fp64
+GrB_Info GB_Adot3B__plus_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
 ) ;
 
+GrB_Info GB_Asaxpy3B__plus_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+) ;
+
+GrB_Info GB_Adot4B__plus_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
 GrB_Info GB_Adot2B__plus_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45475,7 +57485,7 @@ GrB_Info GB_Adot2B__plus_lxor_fp64
 GrB_Info GB_Adot3B__plus_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45483,33 +57493,32 @@ GrB_Info GB_Adot3B__plus_lxor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__plus_lxor_fp64
+GrB_Info GB_Asaxpy3B__plus_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_int8
+GrB_Info GB_Adot4B__plus_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45520,7 +57529,7 @@ GrB_Info GB_Adot2B__times_lxor_int8
 GrB_Info GB_Adot3B__times_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45528,33 +57537,32 @@ GrB_Info GB_Adot3B__times_lxor_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_int8
+GrB_Info GB_Asaxpy3B__times_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_uint8
+GrB_Info GB_Adot4B__times_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45565,7 +57573,7 @@ GrB_Info GB_Adot2B__times_lxor_uint8
 GrB_Info GB_Adot3B__times_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45573,33 +57581,32 @@ GrB_Info GB_Adot3B__times_lxor_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_uint8
+GrB_Info GB_Asaxpy3B__times_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_int16
+GrB_Info GB_Adot4B__times_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45610,7 +57617,7 @@ GrB_Info GB_Adot2B__times_lxor_int16
 GrB_Info GB_Adot3B__times_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45618,33 +57625,32 @@ GrB_Info GB_Adot3B__times_lxor_int16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_int16
+GrB_Info GB_Asaxpy3B__times_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_uint16
+GrB_Info GB_Adot4B__times_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45655,7 +57661,7 @@ GrB_Info GB_Adot2B__times_lxor_uint16
 GrB_Info GB_Adot3B__times_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45663,33 +57669,32 @@ GrB_Info GB_Adot3B__times_lxor_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_uint16
+GrB_Info GB_Asaxpy3B__times_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_int32
+GrB_Info GB_Adot4B__times_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45700,7 +57705,7 @@ GrB_Info GB_Adot2B__times_lxor_int32
 GrB_Info GB_Adot3B__times_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45708,33 +57713,32 @@ GrB_Info GB_Adot3B__times_lxor_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_int32
+GrB_Info GB_Asaxpy3B__times_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_uint32
+GrB_Info GB_Adot4B__times_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45745,7 +57749,7 @@ GrB_Info GB_Adot2B__times_lxor_uint32
 GrB_Info GB_Adot3B__times_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45753,33 +57757,32 @@ GrB_Info GB_Adot3B__times_lxor_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_uint32
+GrB_Info GB_Asaxpy3B__times_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_int64
+GrB_Info GB_Adot4B__times_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45790,7 +57793,7 @@ GrB_Info GB_Adot2B__times_lxor_int64
 GrB_Info GB_Adot3B__times_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45798,33 +57801,32 @@ GrB_Info GB_Adot3B__times_lxor_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_int64
+GrB_Info GB_Asaxpy3B__times_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-
-GrB_Info GB_AgusB__times_lxor_uint64
+GrB_Info GB_Adot4B__times_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45835,7 +57837,7 @@ GrB_Info GB_Adot2B__times_lxor_uint64
 GrB_Info GB_Adot3B__times_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45843,32 +57845,32 @@ GrB_Info GB_Adot3B__times_lxor_uint64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_uint64
+GrB_Info GB_Asaxpy3B__times_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_lxor_fp32
+GrB_Info GB_Adot4B__times_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45879,7 +57881,7 @@ GrB_Info GB_Adot2B__times_lxor_fp32
 GrB_Info GB_Adot3B__times_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45887,32 +57889,32 @@ GrB_Info GB_Adot3B__times_lxor_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_fp32
+GrB_Info GB_Asaxpy3B__times_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__times_lxor_fp64
+GrB_Info GB_Adot4B__times_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__times_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45923,7 +57925,7 @@ GrB_Info GB_Adot2B__times_lxor_fp64
 GrB_Info GB_Adot3B__times_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45931,32 +57933,32 @@ GrB_Info GB_Adot3B__times_lxor_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__times_lxor_fp64
+GrB_Info GB_Asaxpy3B__times_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lor_lxor_bool
+GrB_Info GB_Adot4B__times_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -45967,7 +57969,7 @@ GrB_Info GB_Adot2B__lor_lxor_bool
 GrB_Info GB_Adot3B__lor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -45975,32 +57977,32 @@ GrB_Info GB_Adot3B__lor_lxor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lor_lxor_bool
+GrB_Info GB_Asaxpy3B__lor_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__land_lxor_bool
+GrB_Info GB_Adot4B__lor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__land_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -46011,7 +58013,7 @@ GrB_Info GB_Adot2B__land_lxor_bool
 GrB_Info GB_Adot3B__land_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -46019,32 +58021,32 @@ GrB_Info GB_Adot3B__land_lxor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__land_lxor_bool
+GrB_Info GB_Asaxpy3B__land_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
-
-GrB_Info GB_AgusB__lxor_lxor_bool
+GrB_Info GB_Adot4B__land_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
 ) ;
-
 GrB_Info GB_Adot2B__lxor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -46055,7 +58057,7 @@ GrB_Info GB_Adot2B__lxor_lxor_bool
 GrB_Info GB_Adot3B__lxor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -46063,32 +58065,76 @@ GrB_Info GB_Adot3B__lxor_lxor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__lxor_lxor_bool
+GrB_Info GB_Asaxpy3B__lxor_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__lxor_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
+GrB_Info GB_Adot2B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+) ;
+
+GrB_Info GB_Adot3B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
 
-GrB_Info GB_AgusB__eq_lxor_bool
+GrB_Info GB_Asaxpy3B__any_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__any_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
 GrB_Info GB_Adot2B__eq_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -46099,7 +58145,7 @@ GrB_Info GB_Adot2B__eq_lxor_bool
 GrB_Info GB_Adot3B__eq_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -46107,15 +58153,25 @@ GrB_Info GB_Adot3B__eq_lxor_bool
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB__eq_lxor_bool
+GrB_Info GB_Asaxpy3B__eq_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B__eq_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
diff --git a/Source/Generated/GB_AxB__land_eq_bool.c b/Source/Generated/GB_AxB__land_eq_bool.c
index 1a2f9f6e95..a889c993bd 100644
--- a/Source/Generated/GB_AxB__land_eq_bool.c
+++ b/Source/Generated/GB_AxB__land_eq_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_bool
 // A'*B function (dot2):     GB_Adot2B__land_eq_bool
 // A'*B function (dot3):     GB_Adot3B__land_eq_bool
-// A*B function (heap):      GB_AheapB__land_eq_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LAND_EQ_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_bool
 GrB_Info GB_Adot2B__land_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_bool
 GrB_Info GB_Adot3B__land_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_fp32.c b/Source/Generated/GB_AxB__land_eq_fp32.c
index fa4bc7925e..1fda0eca25 100644
--- a/Source/Generated/GB_AxB__land_eq_fp32.c
+++ b/Source/Generated/GB_AxB__land_eq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_fp32
 // A'*B function (dot2):     GB_Adot2B__land_eq_fp32
 // A'*B function (dot3):     GB_Adot3B__land_eq_fp32
-// A*B function (heap):      GB_AheapB__land_eq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_EQ_FP32 || GxB_NO_LAND_EQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_fp32
 GrB_Info GB_Adot2B__land_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_fp32
 GrB_Info GB_Adot3B__land_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_fp64.c b/Source/Generated/GB_AxB__land_eq_fp64.c
index 96e0834396..071bf23dd0 100644
--- a/Source/Generated/GB_AxB__land_eq_fp64.c
+++ b/Source/Generated/GB_AxB__land_eq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_fp64
 // A'*B function (dot2):     GB_Adot2B__land_eq_fp64
 // A'*B function (dot3):     GB_Adot3B__land_eq_fp64
-// A*B function (heap):      GB_AheapB__land_eq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_EQ_FP64 || GxB_NO_LAND_EQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_fp64
 GrB_Info GB_Adot2B__land_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_fp64
 GrB_Info GB_Adot3B__land_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_int16.c b/Source/Generated/GB_AxB__land_eq_int16.c
index 332283030f..b15e081dc6 100644
--- a/Source/Generated/GB_AxB__land_eq_int16.c
+++ b/Source/Generated/GB_AxB__land_eq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_int16
 // A'*B function (dot2):     GB_Adot2B__land_eq_int16
 // A'*B function (dot3):     GB_Adot3B__land_eq_int16
-// A*B function (heap):      GB_AheapB__land_eq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_EQ_INT16 || GxB_NO_LAND_EQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_int16
 GrB_Info GB_Adot2B__land_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_int16
 GrB_Info GB_Adot3B__land_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_int32.c b/Source/Generated/GB_AxB__land_eq_int32.c
index b6a38a9be7..9479f13fca 100644
--- a/Source/Generated/GB_AxB__land_eq_int32.c
+++ b/Source/Generated/GB_AxB__land_eq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_int32
 // A'*B function (dot2):     GB_Adot2B__land_eq_int32
 // A'*B function (dot3):     GB_Adot3B__land_eq_int32
-// A*B function (heap):      GB_AheapB__land_eq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_EQ_INT32 || GxB_NO_LAND_EQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_int32
 GrB_Info GB_Adot2B__land_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_int32
 GrB_Info GB_Adot3B__land_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_int64.c b/Source/Generated/GB_AxB__land_eq_int64.c
index 132218a4e3..5e7dfb0371 100644
--- a/Source/Generated/GB_AxB__land_eq_int64.c
+++ b/Source/Generated/GB_AxB__land_eq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_int64
 // A'*B function (dot2):     GB_Adot2B__land_eq_int64
 // A'*B function (dot3):     GB_Adot3B__land_eq_int64
-// A*B function (heap):      GB_AheapB__land_eq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_EQ_INT64 || GxB_NO_LAND_EQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_int64
 GrB_Info GB_Adot2B__land_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_int64
 GrB_Info GB_Adot3B__land_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_int8.c b/Source/Generated/GB_AxB__land_eq_int8.c
index cb629df86f..b83a3a54d5 100644
--- a/Source/Generated/GB_AxB__land_eq_int8.c
+++ b/Source/Generated/GB_AxB__land_eq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_int8
 // A'*B function (dot2):     GB_Adot2B__land_eq_int8
 // A'*B function (dot3):     GB_Adot3B__land_eq_int8
-// A*B function (heap):      GB_AheapB__land_eq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_EQ_INT8 || GxB_NO_LAND_EQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_int8
 GrB_Info GB_Adot2B__land_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_int8
 GrB_Info GB_Adot3B__land_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_uint16.c b/Source/Generated/GB_AxB__land_eq_uint16.c
index 49ad5ab07a..09ead4ffec 100644
--- a/Source/Generated/GB_AxB__land_eq_uint16.c
+++ b/Source/Generated/GB_AxB__land_eq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_uint16
 // A'*B function (dot2):     GB_Adot2B__land_eq_uint16
 // A'*B function (dot3):     GB_Adot3B__land_eq_uint16
-// A*B function (heap):      GB_AheapB__land_eq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_EQ_UINT16 || GxB_NO_LAND_EQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_uint16
 GrB_Info GB_Adot2B__land_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_uint16
 GrB_Info GB_Adot3B__land_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_uint32.c b/Source/Generated/GB_AxB__land_eq_uint32.c
index c0fc391d73..c52f74b6bd 100644
--- a/Source/Generated/GB_AxB__land_eq_uint32.c
+++ b/Source/Generated/GB_AxB__land_eq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_uint32
 // A'*B function (dot2):     GB_Adot2B__land_eq_uint32
 // A'*B function (dot3):     GB_Adot3B__land_eq_uint32
-// A*B function (heap):      GB_AheapB__land_eq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_EQ_UINT32 || GxB_NO_LAND_EQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_uint32
 GrB_Info GB_Adot2B__land_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_uint32
 GrB_Info GB_Adot3B__land_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_uint64.c b/Source/Generated/GB_AxB__land_eq_uint64.c
index ae73f89199..31d4d26114 100644
--- a/Source/Generated/GB_AxB__land_eq_uint64.c
+++ b/Source/Generated/GB_AxB__land_eq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_uint64
 // A'*B function (dot2):     GB_Adot2B__land_eq_uint64
 // A'*B function (dot3):     GB_Adot3B__land_eq_uint64
-// A*B function (heap):      GB_AheapB__land_eq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_EQ_UINT64 || GxB_NO_LAND_EQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_uint64
 GrB_Info GB_Adot2B__land_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_uint64
 GrB_Info GB_Adot3B__land_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_eq_uint8.c b/Source/Generated/GB_AxB__land_eq_uint8.c
index 3726e08d8f..d228038347 100644
--- a/Source/Generated/GB_AxB__land_eq_uint8.c
+++ b/Source/Generated/GB_AxB__land_eq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_eq_uint8
 // A'*B function (dot2):     GB_Adot2B__land_eq_uint8
 // A'*B function (dot3):     GB_Adot3B__land_eq_uint8
-// A*B function (heap):      GB_AheapB__land_eq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_eq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_eq_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik == bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik == bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_EQ_UINT8 || GxB_NO_LAND_EQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_eq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_eq_uint8
 GrB_Info GB_Adot2B__land_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_eq_uint8
 GrB_Info GB_Adot3B__land_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_eq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_eq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_first_bool.c b/Source/Generated/GB_AxB__land_first_bool.c
index f310fd1e94..2c8b919bfb 100644
--- a/Source/Generated/GB_AxB__land_first_bool.c
+++ b/Source/Generated/GB_AxB__land_first_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_first_bool
 // A'*B function (dot2):     GB_Adot2B__land_first_bool
 // A'*B function (dot3):     GB_Adot3B__land_first_bool
-// A*B function (heap):      GB_AheapB__land_first_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_first_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_first_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = aik
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && aik)
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= aik
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_FIRST_BOOL || GxB_NO_LAND_FIRST_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_first_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_first_bool
 GrB_Info GB_Adot2B__land_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_first_bool
 GrB_Info GB_Adot3B__land_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_first_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_first_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_bool.c b/Source/Generated/GB_AxB__land_ge_bool.c
index d47135f609..fd15faf33d 100644
--- a/Source/Generated/GB_AxB__land_ge_bool.c
+++ b/Source/Generated/GB_AxB__land_ge_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_bool
 // A'*B function (dot2):     GB_Adot2B__land_ge_bool
 // A'*B function (dot3):     GB_Adot3B__land_ge_bool
-// A*B function (heap):      GB_AheapB__land_ge_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_GE_BOOL || GxB_NO_LAND_GE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_bool
 GrB_Info GB_Adot2B__land_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_bool
 GrB_Info GB_Adot3B__land_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_fp32.c b/Source/Generated/GB_AxB__land_ge_fp32.c
index b06377429e..923903e7eb 100644
--- a/Source/Generated/GB_AxB__land_ge_fp32.c
+++ b/Source/Generated/GB_AxB__land_ge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_fp32
 // A'*B function (dot2):     GB_Adot2B__land_ge_fp32
 // A'*B function (dot3):     GB_Adot3B__land_ge_fp32
-// A*B function (heap):      GB_AheapB__land_ge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_GE_FP32 || GxB_NO_LAND_GE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_fp32
 GrB_Info GB_Adot2B__land_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_fp32
 GrB_Info GB_Adot3B__land_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_fp64.c b/Source/Generated/GB_AxB__land_ge_fp64.c
index ba5f7660d7..6a24c88cb2 100644
--- a/Source/Generated/GB_AxB__land_ge_fp64.c
+++ b/Source/Generated/GB_AxB__land_ge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_fp64
 // A'*B function (dot2):     GB_Adot2B__land_ge_fp64
 // A'*B function (dot3):     GB_Adot3B__land_ge_fp64
-// A*B function (heap):      GB_AheapB__land_ge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_GE_FP64 || GxB_NO_LAND_GE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_fp64
 GrB_Info GB_Adot2B__land_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_fp64
 GrB_Info GB_Adot3B__land_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_int16.c b/Source/Generated/GB_AxB__land_ge_int16.c
index 207f32ea03..6b6bd43a4a 100644
--- a/Source/Generated/GB_AxB__land_ge_int16.c
+++ b/Source/Generated/GB_AxB__land_ge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_int16
 // A'*B function (dot2):     GB_Adot2B__land_ge_int16
 // A'*B function (dot3):     GB_Adot3B__land_ge_int16
-// A*B function (heap):      GB_AheapB__land_ge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_GE_INT16 || GxB_NO_LAND_GE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_int16
 GrB_Info GB_Adot2B__land_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_int16
 GrB_Info GB_Adot3B__land_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_int32.c b/Source/Generated/GB_AxB__land_ge_int32.c
index 91c2e36e5e..2390db250f 100644
--- a/Source/Generated/GB_AxB__land_ge_int32.c
+++ b/Source/Generated/GB_AxB__land_ge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_int32
 // A'*B function (dot2):     GB_Adot2B__land_ge_int32
 // A'*B function (dot3):     GB_Adot3B__land_ge_int32
-// A*B function (heap):      GB_AheapB__land_ge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_GE_INT32 || GxB_NO_LAND_GE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_int32
 GrB_Info GB_Adot2B__land_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_int32
 GrB_Info GB_Adot3B__land_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_int64.c b/Source/Generated/GB_AxB__land_ge_int64.c
index 4786c5211a..23e8b56e30 100644
--- a/Source/Generated/GB_AxB__land_ge_int64.c
+++ b/Source/Generated/GB_AxB__land_ge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_int64
 // A'*B function (dot2):     GB_Adot2B__land_ge_int64
 // A'*B function (dot3):     GB_Adot3B__land_ge_int64
-// A*B function (heap):      GB_AheapB__land_ge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_GE_INT64 || GxB_NO_LAND_GE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_int64
 GrB_Info GB_Adot2B__land_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_int64
 GrB_Info GB_Adot3B__land_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_int8.c b/Source/Generated/GB_AxB__land_ge_int8.c
index b66bd427c0..acaefd6ba7 100644
--- a/Source/Generated/GB_AxB__land_ge_int8.c
+++ b/Source/Generated/GB_AxB__land_ge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_int8
 // A'*B function (dot2):     GB_Adot2B__land_ge_int8
 // A'*B function (dot3):     GB_Adot3B__land_ge_int8
-// A*B function (heap):      GB_AheapB__land_ge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_GE_INT8 || GxB_NO_LAND_GE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_int8
 GrB_Info GB_Adot2B__land_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_int8
 GrB_Info GB_Adot3B__land_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_uint16.c b/Source/Generated/GB_AxB__land_ge_uint16.c
index 2cf8c8f1ee..cc66283779 100644
--- a/Source/Generated/GB_AxB__land_ge_uint16.c
+++ b/Source/Generated/GB_AxB__land_ge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_uint16
 // A'*B function (dot2):     GB_Adot2B__land_ge_uint16
 // A'*B function (dot3):     GB_Adot3B__land_ge_uint16
-// A*B function (heap):      GB_AheapB__land_ge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_GE_UINT16 || GxB_NO_LAND_GE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_uint16
 GrB_Info GB_Adot2B__land_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_uint16
 GrB_Info GB_Adot3B__land_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_uint32.c b/Source/Generated/GB_AxB__land_ge_uint32.c
index 1dda1a1922..32a48286b7 100644
--- a/Source/Generated/GB_AxB__land_ge_uint32.c
+++ b/Source/Generated/GB_AxB__land_ge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_uint32
 // A'*B function (dot2):     GB_Adot2B__land_ge_uint32
 // A'*B function (dot3):     GB_Adot3B__land_ge_uint32
-// A*B function (heap):      GB_AheapB__land_ge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_GE_UINT32 || GxB_NO_LAND_GE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_uint32
 GrB_Info GB_Adot2B__land_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_uint32
 GrB_Info GB_Adot3B__land_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_uint64.c b/Source/Generated/GB_AxB__land_ge_uint64.c
index 0f141b8186..a353a22dc4 100644
--- a/Source/Generated/GB_AxB__land_ge_uint64.c
+++ b/Source/Generated/GB_AxB__land_ge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_uint64
 // A'*B function (dot2):     GB_Adot2B__land_ge_uint64
 // A'*B function (dot3):     GB_Adot3B__land_ge_uint64
-// A*B function (heap):      GB_AheapB__land_ge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_GE_UINT64 || GxB_NO_LAND_GE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_uint64
 GrB_Info GB_Adot2B__land_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_uint64
 GrB_Info GB_Adot3B__land_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ge_uint8.c b/Source/Generated/GB_AxB__land_ge_uint8.c
index 7aaf89c625..933fdee588 100644
--- a/Source/Generated/GB_AxB__land_ge_uint8.c
+++ b/Source/Generated/GB_AxB__land_ge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ge_uint8
 // A'*B function (dot2):     GB_Adot2B__land_ge_uint8
 // A'*B function (dot3):     GB_Adot3B__land_ge_uint8
-// A*B function (heap):      GB_AheapB__land_ge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_ge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ge_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik >= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik >= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_GE_UINT8 || GxB_NO_LAND_GE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ge_uint8
 GrB_Info GB_Adot2B__land_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ge_uint8
 GrB_Info GB_Adot3B__land_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_bool.c b/Source/Generated/GB_AxB__land_gt_bool.c
index 52d64a6589..8fa5f2df02 100644
--- a/Source/Generated/GB_AxB__land_gt_bool.c
+++ b/Source/Generated/GB_AxB__land_gt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_bool
 // A'*B function (dot2):     GB_Adot2B__land_gt_bool
 // A'*B function (dot3):     GB_Adot3B__land_gt_bool
-// A*B function (heap):      GB_AheapB__land_gt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_GT_BOOL || GxB_NO_LAND_GT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_bool
 GrB_Info GB_Adot2B__land_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_bool
 GrB_Info GB_Adot3B__land_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_fp32.c b/Source/Generated/GB_AxB__land_gt_fp32.c
index dacbb6d695..edab3146f0 100644
--- a/Source/Generated/GB_AxB__land_gt_fp32.c
+++ b/Source/Generated/GB_AxB__land_gt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_fp32
 // A'*B function (dot2):     GB_Adot2B__land_gt_fp32
 // A'*B function (dot3):     GB_Adot3B__land_gt_fp32
-// A*B function (heap):      GB_AheapB__land_gt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_GT_FP32 || GxB_NO_LAND_GT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_fp32
 GrB_Info GB_Adot2B__land_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_fp32
 GrB_Info GB_Adot3B__land_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_fp64.c b/Source/Generated/GB_AxB__land_gt_fp64.c
index 085715859c..8de1f53002 100644
--- a/Source/Generated/GB_AxB__land_gt_fp64.c
+++ b/Source/Generated/GB_AxB__land_gt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_fp64
 // A'*B function (dot2):     GB_Adot2B__land_gt_fp64
 // A'*B function (dot3):     GB_Adot3B__land_gt_fp64
-// A*B function (heap):      GB_AheapB__land_gt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_GT_FP64 || GxB_NO_LAND_GT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_fp64
 GrB_Info GB_Adot2B__land_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_fp64
 GrB_Info GB_Adot3B__land_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_int16.c b/Source/Generated/GB_AxB__land_gt_int16.c
index 13842251f1..edb90de806 100644
--- a/Source/Generated/GB_AxB__land_gt_int16.c
+++ b/Source/Generated/GB_AxB__land_gt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_int16
 // A'*B function (dot2):     GB_Adot2B__land_gt_int16
 // A'*B function (dot3):     GB_Adot3B__land_gt_int16
-// A*B function (heap):      GB_AheapB__land_gt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_GT_INT16 || GxB_NO_LAND_GT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_int16
 GrB_Info GB_Adot2B__land_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_int16
 GrB_Info GB_Adot3B__land_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_int32.c b/Source/Generated/GB_AxB__land_gt_int32.c
index 373f5bcb1b..6959a49e81 100644
--- a/Source/Generated/GB_AxB__land_gt_int32.c
+++ b/Source/Generated/GB_AxB__land_gt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_int32
 // A'*B function (dot2):     GB_Adot2B__land_gt_int32
 // A'*B function (dot3):     GB_Adot3B__land_gt_int32
-// A*B function (heap):      GB_AheapB__land_gt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_GT_INT32 || GxB_NO_LAND_GT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_int32
 GrB_Info GB_Adot2B__land_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_int32
 GrB_Info GB_Adot3B__land_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_int64.c b/Source/Generated/GB_AxB__land_gt_int64.c
index 3e673952bf..ced1228298 100644
--- a/Source/Generated/GB_AxB__land_gt_int64.c
+++ b/Source/Generated/GB_AxB__land_gt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_int64
 // A'*B function (dot2):     GB_Adot2B__land_gt_int64
 // A'*B function (dot3):     GB_Adot3B__land_gt_int64
-// A*B function (heap):      GB_AheapB__land_gt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_GT_INT64 || GxB_NO_LAND_GT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_int64
 GrB_Info GB_Adot2B__land_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_int64
 GrB_Info GB_Adot3B__land_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_int8.c b/Source/Generated/GB_AxB__land_gt_int8.c
index 87352872e9..17dc2fc800 100644
--- a/Source/Generated/GB_AxB__land_gt_int8.c
+++ b/Source/Generated/GB_AxB__land_gt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_int8
 // A'*B function (dot2):     GB_Adot2B__land_gt_int8
 // A'*B function (dot3):     GB_Adot3B__land_gt_int8
-// A*B function (heap):      GB_AheapB__land_gt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_GT_INT8 || GxB_NO_LAND_GT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_int8
 GrB_Info GB_Adot2B__land_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_int8
 GrB_Info GB_Adot3B__land_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_uint16.c b/Source/Generated/GB_AxB__land_gt_uint16.c
index 980262a034..fffd2a9981 100644
--- a/Source/Generated/GB_AxB__land_gt_uint16.c
+++ b/Source/Generated/GB_AxB__land_gt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_uint16
 // A'*B function (dot2):     GB_Adot2B__land_gt_uint16
 // A'*B function (dot3):     GB_Adot3B__land_gt_uint16
-// A*B function (heap):      GB_AheapB__land_gt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_GT_UINT16 || GxB_NO_LAND_GT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_uint16
 GrB_Info GB_Adot2B__land_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_uint16
 GrB_Info GB_Adot3B__land_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_uint32.c b/Source/Generated/GB_AxB__land_gt_uint32.c
index 9d77bd20dd..cb22f2c09f 100644
--- a/Source/Generated/GB_AxB__land_gt_uint32.c
+++ b/Source/Generated/GB_AxB__land_gt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_uint32
 // A'*B function (dot2):     GB_Adot2B__land_gt_uint32
 // A'*B function (dot3):     GB_Adot3B__land_gt_uint32
-// A*B function (heap):      GB_AheapB__land_gt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_GT_UINT32 || GxB_NO_LAND_GT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_uint32
 GrB_Info GB_Adot2B__land_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_uint32
 GrB_Info GB_Adot3B__land_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_uint64.c b/Source/Generated/GB_AxB__land_gt_uint64.c
index 2d91d0a329..952946e75f 100644
--- a/Source/Generated/GB_AxB__land_gt_uint64.c
+++ b/Source/Generated/GB_AxB__land_gt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_uint64
 // A'*B function (dot2):     GB_Adot2B__land_gt_uint64
 // A'*B function (dot3):     GB_Adot3B__land_gt_uint64
-// A*B function (heap):      GB_AheapB__land_gt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_GT_UINT64 || GxB_NO_LAND_GT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_uint64
 GrB_Info GB_Adot2B__land_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_uint64
 GrB_Info GB_Adot3B__land_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_gt_uint8.c b/Source/Generated/GB_AxB__land_gt_uint8.c
index 953c9ce122..f208af60c8 100644
--- a/Source/Generated/GB_AxB__land_gt_uint8.c
+++ b/Source/Generated/GB_AxB__land_gt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_gt_uint8
 // A'*B function (dot2):     GB_Adot2B__land_gt_uint8
 // A'*B function (dot3):     GB_Adot3B__land_gt_uint8
-// A*B function (heap):      GB_AheapB__land_gt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_gt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_gt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik > bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik > bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_GT_UINT8 || GxB_NO_LAND_GT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_gt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_gt_uint8
 GrB_Info GB_Adot2B__land_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_gt_uint8
 GrB_Info GB_Adot3B__land_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_gt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_gt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_land_bool.c b/Source/Generated/GB_AxB__land_land_bool.c
index 8b45f345d1..0397d4b9f5 100644
--- a/Source/Generated/GB_AxB__land_land_bool.c
+++ b/Source/Generated/GB_AxB__land_land_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_land_bool
 // A'*B function (dot2):     GB_Adot2B__land_land_bool
 // A'*B function (dot3):     GB_Adot3B__land_land_bool
-// A*B function (heap):      GB_AheapB__land_land_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_land_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_land_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik && bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik && bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik && bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x && y) ;
+#define GB_MULT(z, x, y) \
+    z = (x && y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x && y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x && y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LAND_LAND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_land_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_land_bool
 GrB_Info GB_Adot2B__land_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_land_bool
 GrB_Info GB_Adot3B__land_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_land_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_land_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_bool.c b/Source/Generated/GB_AxB__land_le_bool.c
index 92485e98eb..f7ceb54195 100644
--- a/Source/Generated/GB_AxB__land_le_bool.c
+++ b/Source/Generated/GB_AxB__land_le_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_bool
 // A'*B function (dot2):     GB_Adot2B__land_le_bool
 // A'*B function (dot3):     GB_Adot3B__land_le_bool
-// A*B function (heap):      GB_AheapB__land_le_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LE_BOOL || GxB_NO_LAND_LE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_bool
 GrB_Info GB_Adot2B__land_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_bool
 GrB_Info GB_Adot3B__land_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_fp32.c b/Source/Generated/GB_AxB__land_le_fp32.c
index fa9aa51aff..7fc9b7bee5 100644
--- a/Source/Generated/GB_AxB__land_le_fp32.c
+++ b/Source/Generated/GB_AxB__land_le_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_fp32
 // A'*B function (dot2):     GB_Adot2B__land_le_fp32
 // A'*B function (dot3):     GB_Adot3B__land_le_fp32
-// A*B function (heap):      GB_AheapB__land_le_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_LE_FP32 || GxB_NO_LAND_LE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_fp32
 GrB_Info GB_Adot2B__land_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_fp32
 GrB_Info GB_Adot3B__land_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_fp64.c b/Source/Generated/GB_AxB__land_le_fp64.c
index 24fdb47933..51544cc4f5 100644
--- a/Source/Generated/GB_AxB__land_le_fp64.c
+++ b/Source/Generated/GB_AxB__land_le_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_fp64
 // A'*B function (dot2):     GB_Adot2B__land_le_fp64
 // A'*B function (dot3):     GB_Adot3B__land_le_fp64
-// A*B function (heap):      GB_AheapB__land_le_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_LE_FP64 || GxB_NO_LAND_LE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_fp64
 GrB_Info GB_Adot2B__land_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_fp64
 GrB_Info GB_Adot3B__land_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_int16.c b/Source/Generated/GB_AxB__land_le_int16.c
index 305e23cabd..a977745343 100644
--- a/Source/Generated/GB_AxB__land_le_int16.c
+++ b/Source/Generated/GB_AxB__land_le_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_int16
 // A'*B function (dot2):     GB_Adot2B__land_le_int16
 // A'*B function (dot3):     GB_Adot3B__land_le_int16
-// A*B function (heap):      GB_AheapB__land_le_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_LE_INT16 || GxB_NO_LAND_LE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_int16
 GrB_Info GB_Adot2B__land_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_int16
 GrB_Info GB_Adot3B__land_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_int32.c b/Source/Generated/GB_AxB__land_le_int32.c
index 4e232185f1..35e55261b3 100644
--- a/Source/Generated/GB_AxB__land_le_int32.c
+++ b/Source/Generated/GB_AxB__land_le_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_int32
 // A'*B function (dot2):     GB_Adot2B__land_le_int32
 // A'*B function (dot3):     GB_Adot3B__land_le_int32
-// A*B function (heap):      GB_AheapB__land_le_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_LE_INT32 || GxB_NO_LAND_LE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_int32
 GrB_Info GB_Adot2B__land_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_int32
 GrB_Info GB_Adot3B__land_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_int64.c b/Source/Generated/GB_AxB__land_le_int64.c
index 208ce276c5..f3e30f6452 100644
--- a/Source/Generated/GB_AxB__land_le_int64.c
+++ b/Source/Generated/GB_AxB__land_le_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_int64
 // A'*B function (dot2):     GB_Adot2B__land_le_int64
 // A'*B function (dot3):     GB_Adot3B__land_le_int64
-// A*B function (heap):      GB_AheapB__land_le_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_LE_INT64 || GxB_NO_LAND_LE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_int64
 GrB_Info GB_Adot2B__land_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_int64
 GrB_Info GB_Adot3B__land_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_int8.c b/Source/Generated/GB_AxB__land_le_int8.c
index 23b176a6c0..03d82acf74 100644
--- a/Source/Generated/GB_AxB__land_le_int8.c
+++ b/Source/Generated/GB_AxB__land_le_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_int8
 // A'*B function (dot2):     GB_Adot2B__land_le_int8
 // A'*B function (dot3):     GB_Adot3B__land_le_int8
-// A*B function (heap):      GB_AheapB__land_le_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_LE_INT8 || GxB_NO_LAND_LE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_int8
 GrB_Info GB_Adot2B__land_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_int8
 GrB_Info GB_Adot3B__land_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_uint16.c b/Source/Generated/GB_AxB__land_le_uint16.c
index 2a5ea37cda..669ce7cbb7 100644
--- a/Source/Generated/GB_AxB__land_le_uint16.c
+++ b/Source/Generated/GB_AxB__land_le_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_uint16
 // A'*B function (dot2):     GB_Adot2B__land_le_uint16
 // A'*B function (dot3):     GB_Adot3B__land_le_uint16
-// A*B function (heap):      GB_AheapB__land_le_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_LE_UINT16 || GxB_NO_LAND_LE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_uint16
 GrB_Info GB_Adot2B__land_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_uint16
 GrB_Info GB_Adot3B__land_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_uint32.c b/Source/Generated/GB_AxB__land_le_uint32.c
index ffd7de6095..41371faf63 100644
--- a/Source/Generated/GB_AxB__land_le_uint32.c
+++ b/Source/Generated/GB_AxB__land_le_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_uint32
 // A'*B function (dot2):     GB_Adot2B__land_le_uint32
 // A'*B function (dot3):     GB_Adot3B__land_le_uint32
-// A*B function (heap):      GB_AheapB__land_le_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_LE_UINT32 || GxB_NO_LAND_LE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_uint32
 GrB_Info GB_Adot2B__land_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_uint32
 GrB_Info GB_Adot3B__land_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_uint64.c b/Source/Generated/GB_AxB__land_le_uint64.c
index 514a482eaa..a5bf6b704a 100644
--- a/Source/Generated/GB_AxB__land_le_uint64.c
+++ b/Source/Generated/GB_AxB__land_le_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_uint64
 // A'*B function (dot2):     GB_Adot2B__land_le_uint64
 // A'*B function (dot3):     GB_Adot3B__land_le_uint64
-// A*B function (heap):      GB_AheapB__land_le_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_LE_UINT64 || GxB_NO_LAND_LE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_uint64
 GrB_Info GB_Adot2B__land_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_uint64
 GrB_Info GB_Adot3B__land_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_le_uint8.c b/Source/Generated/GB_AxB__land_le_uint8.c
index 65f0a6397b..c13178aa9f 100644
--- a/Source/Generated/GB_AxB__land_le_uint8.c
+++ b/Source/Generated/GB_AxB__land_le_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_le_uint8
 // A'*B function (dot2):     GB_Adot2B__land_le_uint8
 // A'*B function (dot3):     GB_Adot3B__land_le_uint8
-// A*B function (heap):      GB_AheapB__land_le_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_le_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_le_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik <= bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik <= bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_LE_UINT8 || GxB_NO_LAND_LE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_le_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_le_uint8
 GrB_Info GB_Adot2B__land_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_le_uint8
 GrB_Info GB_Adot3B__land_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_le_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_le_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lor_bool.c b/Source/Generated/GB_AxB__land_lor_bool.c
index 847b9e6a65..1c9748a269 100644
--- a/Source/Generated/GB_AxB__land_lor_bool.c
+++ b/Source/Generated/GB_AxB__land_lor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lor_bool
 // A'*B function (dot2):     GB_Adot2B__land_lor_bool
 // A'*B function (dot3):     GB_Adot3B__land_lor_bool
-// A*B function (heap):      GB_AheapB__land_lor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_lor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik || bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik || bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik || bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x || y) ;
+#define GB_MULT(z, x, y) \
+    z = (x || y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x || y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x || y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LAND_LOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lor_bool
 GrB_Info GB_Adot2B__land_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lor_bool
 GrB_Info GB_Adot3B__land_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_bool.c b/Source/Generated/GB_AxB__land_lt_bool.c
index cdaaeb20bf..e00f7407ba 100644
--- a/Source/Generated/GB_AxB__land_lt_bool.c
+++ b/Source/Generated/GB_AxB__land_lt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_bool
 // A'*B function (dot2):     GB_Adot2B__land_lt_bool
 // A'*B function (dot3):     GB_Adot3B__land_lt_bool
-// A*B function (heap):      GB_AheapB__land_lt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LT_BOOL || GxB_NO_LAND_LT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_bool
 GrB_Info GB_Adot2B__land_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_bool
 GrB_Info GB_Adot3B__land_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_fp32.c b/Source/Generated/GB_AxB__land_lt_fp32.c
index 14c287a073..847d1e0fdb 100644
--- a/Source/Generated/GB_AxB__land_lt_fp32.c
+++ b/Source/Generated/GB_AxB__land_lt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_fp32
 // A'*B function (dot2):     GB_Adot2B__land_lt_fp32
 // A'*B function (dot3):     GB_Adot3B__land_lt_fp32
-// A*B function (heap):      GB_AheapB__land_lt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_LT_FP32 || GxB_NO_LAND_LT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_fp32
 GrB_Info GB_Adot2B__land_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_fp32
 GrB_Info GB_Adot3B__land_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_fp64.c b/Source/Generated/GB_AxB__land_lt_fp64.c
index f73c19802b..c5e8c0c29a 100644
--- a/Source/Generated/GB_AxB__land_lt_fp64.c
+++ b/Source/Generated/GB_AxB__land_lt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_fp64
 // A'*B function (dot2):     GB_Adot2B__land_lt_fp64
 // A'*B function (dot3):     GB_Adot3B__land_lt_fp64
-// A*B function (heap):      GB_AheapB__land_lt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_LT_FP64 || GxB_NO_LAND_LT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_fp64
 GrB_Info GB_Adot2B__land_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_fp64
 GrB_Info GB_Adot3B__land_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_int16.c b/Source/Generated/GB_AxB__land_lt_int16.c
index 66fb5ad02c..94454f24f8 100644
--- a/Source/Generated/GB_AxB__land_lt_int16.c
+++ b/Source/Generated/GB_AxB__land_lt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_int16
 // A'*B function (dot2):     GB_Adot2B__land_lt_int16
 // A'*B function (dot3):     GB_Adot3B__land_lt_int16
-// A*B function (heap):      GB_AheapB__land_lt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_LT_INT16 || GxB_NO_LAND_LT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_int16
 GrB_Info GB_Adot2B__land_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_int16
 GrB_Info GB_Adot3B__land_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_int32.c b/Source/Generated/GB_AxB__land_lt_int32.c
index f6a6c59646..a354098c4c 100644
--- a/Source/Generated/GB_AxB__land_lt_int32.c
+++ b/Source/Generated/GB_AxB__land_lt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_int32
 // A'*B function (dot2):     GB_Adot2B__land_lt_int32
 // A'*B function (dot3):     GB_Adot3B__land_lt_int32
-// A*B function (heap):      GB_AheapB__land_lt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_LT_INT32 || GxB_NO_LAND_LT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_int32
 GrB_Info GB_Adot2B__land_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_int32
 GrB_Info GB_Adot3B__land_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_int64.c b/Source/Generated/GB_AxB__land_lt_int64.c
index 4264d8a1a2..e23cd232f0 100644
--- a/Source/Generated/GB_AxB__land_lt_int64.c
+++ b/Source/Generated/GB_AxB__land_lt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_int64
 // A'*B function (dot2):     GB_Adot2B__land_lt_int64
 // A'*B function (dot3):     GB_Adot3B__land_lt_int64
-// A*B function (heap):      GB_AheapB__land_lt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_LT_INT64 || GxB_NO_LAND_LT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_int64
 GrB_Info GB_Adot2B__land_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_int64
 GrB_Info GB_Adot3B__land_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_int8.c b/Source/Generated/GB_AxB__land_lt_int8.c
index 9830f17360..843ddda172 100644
--- a/Source/Generated/GB_AxB__land_lt_int8.c
+++ b/Source/Generated/GB_AxB__land_lt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_int8
 // A'*B function (dot2):     GB_Adot2B__land_lt_int8
 // A'*B function (dot3):     GB_Adot3B__land_lt_int8
-// A*B function (heap):      GB_AheapB__land_lt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_LT_INT8 || GxB_NO_LAND_LT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_int8
 GrB_Info GB_Adot2B__land_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_int8
 GrB_Info GB_Adot3B__land_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_uint16.c b/Source/Generated/GB_AxB__land_lt_uint16.c
index 853e1a232f..414a40a660 100644
--- a/Source/Generated/GB_AxB__land_lt_uint16.c
+++ b/Source/Generated/GB_AxB__land_lt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_uint16
 // A'*B function (dot2):     GB_Adot2B__land_lt_uint16
 // A'*B function (dot3):     GB_Adot3B__land_lt_uint16
-// A*B function (heap):      GB_AheapB__land_lt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_LT_UINT16 || GxB_NO_LAND_LT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_uint16
 GrB_Info GB_Adot2B__land_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_uint16
 GrB_Info GB_Adot3B__land_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_uint32.c b/Source/Generated/GB_AxB__land_lt_uint32.c
index a6737969af..19bef4db55 100644
--- a/Source/Generated/GB_AxB__land_lt_uint32.c
+++ b/Source/Generated/GB_AxB__land_lt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_uint32
 // A'*B function (dot2):     GB_Adot2B__land_lt_uint32
 // A'*B function (dot3):     GB_Adot3B__land_lt_uint32
-// A*B function (heap):      GB_AheapB__land_lt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_LT_UINT32 || GxB_NO_LAND_LT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_uint32
 GrB_Info GB_Adot2B__land_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_uint32
 GrB_Info GB_Adot3B__land_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_uint64.c b/Source/Generated/GB_AxB__land_lt_uint64.c
index 81bac6a662..056f3ee9f2 100644
--- a/Source/Generated/GB_AxB__land_lt_uint64.c
+++ b/Source/Generated/GB_AxB__land_lt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_uint64
 // A'*B function (dot2):     GB_Adot2B__land_lt_uint64
 // A'*B function (dot3):     GB_Adot3B__land_lt_uint64
-// A*B function (heap):      GB_AheapB__land_lt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_LT_UINT64 || GxB_NO_LAND_LT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_uint64
 GrB_Info GB_Adot2B__land_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_uint64
 GrB_Info GB_Adot3B__land_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lt_uint8.c b/Source/Generated/GB_AxB__land_lt_uint8.c
index 5b90ac3fcf..74fdd94675 100644
--- a/Source/Generated/GB_AxB__land_lt_uint8.c
+++ b/Source/Generated/GB_AxB__land_lt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lt_uint8
 // A'*B function (dot2):     GB_Adot2B__land_lt_uint8
 // A'*B function (dot3):     GB_Adot3B__land_lt_uint8
-// A*B function (heap):      GB_AheapB__land_lt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_lt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik < bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik < bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_LT_UINT8 || GxB_NO_LAND_LT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lt_uint8
 GrB_Info GB_Adot2B__land_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lt_uint8
 GrB_Info GB_Adot3B__land_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_lxor_bool.c b/Source/Generated/GB_AxB__land_lxor_bool.c
index 59dd1bc095..e956dda277 100644
--- a/Source/Generated/GB_AxB__land_lxor_bool.c
+++ b/Source/Generated/GB_AxB__land_lxor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_lxor_bool
 // A'*B function (dot2):     GB_Adot2B__land_lxor_bool
 // A'*B function (dot3):     GB_Adot3B__land_lxor_bool
-// A*B function (heap):      GB_AheapB__land_lxor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_lxor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_lxor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LAND_LXOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_lxor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_lxor_bool
 GrB_Info GB_Adot2B__land_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_lxor_bool
 GrB_Info GB_Adot3B__land_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_lxor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_lxor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_fp32.c b/Source/Generated/GB_AxB__land_ne_fp32.c
index b943311c3e..9a81818433 100644
--- a/Source/Generated/GB_AxB__land_ne_fp32.c
+++ b/Source/Generated/GB_AxB__land_ne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_fp32
 // A'*B function (dot2):     GB_Adot2B__land_ne_fp32
 // A'*B function (dot3):     GB_Adot3B__land_ne_fp32
-// A*B function (heap):      GB_AheapB__land_ne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_FP32 || GxB_NO_LAND_BOOL || GxB_NO_NE_FP32 || GxB_NO_LAND_NE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_fp32
 GrB_Info GB_Adot2B__land_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_fp32
 GrB_Info GB_Adot3B__land_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_fp64.c b/Source/Generated/GB_AxB__land_ne_fp64.c
index 8bd745ebeb..f6308c4f93 100644
--- a/Source/Generated/GB_AxB__land_ne_fp64.c
+++ b/Source/Generated/GB_AxB__land_ne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_fp64
 // A'*B function (dot2):     GB_Adot2B__land_ne_fp64
 // A'*B function (dot3):     GB_Adot3B__land_ne_fp64
-// A*B function (heap):      GB_AheapB__land_ne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_FP64 || GxB_NO_LAND_BOOL || GxB_NO_NE_FP64 || GxB_NO_LAND_NE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_fp64
 GrB_Info GB_Adot2B__land_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_fp64
 GrB_Info GB_Adot3B__land_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_int16.c b/Source/Generated/GB_AxB__land_ne_int16.c
index 745b82d2c9..54e35d9631 100644
--- a/Source/Generated/GB_AxB__land_ne_int16.c
+++ b/Source/Generated/GB_AxB__land_ne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_int16
 // A'*B function (dot2):     GB_Adot2B__land_ne_int16
 // A'*B function (dot3):     GB_Adot3B__land_ne_int16
-// A*B function (heap):      GB_AheapB__land_ne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_INT16 || GxB_NO_LAND_BOOL || GxB_NO_NE_INT16 || GxB_NO_LAND_NE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_int16
 GrB_Info GB_Adot2B__land_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_int16
 GrB_Info GB_Adot3B__land_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_int32.c b/Source/Generated/GB_AxB__land_ne_int32.c
index afb93379db..1b7b7bdab3 100644
--- a/Source/Generated/GB_AxB__land_ne_int32.c
+++ b/Source/Generated/GB_AxB__land_ne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_int32
 // A'*B function (dot2):     GB_Adot2B__land_ne_int32
 // A'*B function (dot3):     GB_Adot3B__land_ne_int32
-// A*B function (heap):      GB_AheapB__land_ne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_INT32 || GxB_NO_LAND_BOOL || GxB_NO_NE_INT32 || GxB_NO_LAND_NE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_int32
 GrB_Info GB_Adot2B__land_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_int32
 GrB_Info GB_Adot3B__land_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_int64.c b/Source/Generated/GB_AxB__land_ne_int64.c
index 5a8ed95937..09c9805c59 100644
--- a/Source/Generated/GB_AxB__land_ne_int64.c
+++ b/Source/Generated/GB_AxB__land_ne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_int64
 // A'*B function (dot2):     GB_Adot2B__land_ne_int64
 // A'*B function (dot3):     GB_Adot3B__land_ne_int64
-// A*B function (heap):      GB_AheapB__land_ne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_INT64 || GxB_NO_LAND_BOOL || GxB_NO_NE_INT64 || GxB_NO_LAND_NE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_int64
 GrB_Info GB_Adot2B__land_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_int64
 GrB_Info GB_Adot3B__land_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_int8.c b/Source/Generated/GB_AxB__land_ne_int8.c
index af07730dc1..1f6844def5 100644
--- a/Source/Generated/GB_AxB__land_ne_int8.c
+++ b/Source/Generated/GB_AxB__land_ne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_int8
 // A'*B function (dot2):     GB_Adot2B__land_ne_int8
 // A'*B function (dot3):     GB_Adot3B__land_ne_int8
-// A*B function (heap):      GB_AheapB__land_ne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_INT8 || GxB_NO_LAND_BOOL || GxB_NO_NE_INT8 || GxB_NO_LAND_NE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_int8
 GrB_Info GB_Adot2B__land_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_int8
 GrB_Info GB_Adot3B__land_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_uint16.c b/Source/Generated/GB_AxB__land_ne_uint16.c
index b05b60bd9d..10f1db4415 100644
--- a/Source/Generated/GB_AxB__land_ne_uint16.c
+++ b/Source/Generated/GB_AxB__land_ne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_uint16
 // A'*B function (dot2):     GB_Adot2B__land_ne_uint16
 // A'*B function (dot3):     GB_Adot3B__land_ne_uint16
-// A*B function (heap):      GB_AheapB__land_ne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_LAND_BOOL || GxB_NO_NE_UINT16 || GxB_NO_LAND_NE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_uint16
 GrB_Info GB_Adot2B__land_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_uint16
 GrB_Info GB_Adot3B__land_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_uint32.c b/Source/Generated/GB_AxB__land_ne_uint32.c
index 1704ed1f20..1f00603fbb 100644
--- a/Source/Generated/GB_AxB__land_ne_uint32.c
+++ b/Source/Generated/GB_AxB__land_ne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_uint32
 // A'*B function (dot2):     GB_Adot2B__land_ne_uint32
 // A'*B function (dot3):     GB_Adot3B__land_ne_uint32
-// A*B function (heap):      GB_AheapB__land_ne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_LAND_BOOL || GxB_NO_NE_UINT32 || GxB_NO_LAND_NE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_uint32
 GrB_Info GB_Adot2B__land_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_uint32
 GrB_Info GB_Adot3B__land_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_uint64.c b/Source/Generated/GB_AxB__land_ne_uint64.c
index 3da25b7301..0f2c513a34 100644
--- a/Source/Generated/GB_AxB__land_ne_uint64.c
+++ b/Source/Generated/GB_AxB__land_ne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_uint64
 // A'*B function (dot2):     GB_Adot2B__land_ne_uint64
 // A'*B function (dot3):     GB_Adot3B__land_ne_uint64
-// A*B function (heap):      GB_AheapB__land_ne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_LAND_BOOL || GxB_NO_NE_UINT64 || GxB_NO_LAND_NE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_uint64
 GrB_Info GB_Adot2B__land_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_uint64
 GrB_Info GB_Adot3B__land_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_ne_uint8.c b/Source/Generated/GB_AxB__land_ne_uint8.c
index 269722b157..61d4d21ce1 100644
--- a/Source/Generated/GB_AxB__land_ne_uint8.c
+++ b/Source/Generated/GB_AxB__land_ne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_ne_uint8
 // A'*B function (dot2):     GB_Adot2B__land_ne_uint8
 // A'*B function (dot3):     GB_Adot3B__land_ne_uint8
-// A*B function (heap):      GB_AheapB__land_ne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__land_ne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__land_ne_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && (aik != bkj))
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= (aik != bkj)
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_LAND_BOOL || GxB_NO_NE_UINT8 || GxB_NO_LAND_NE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_ne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_ne_uint8
 GrB_Info GB_Adot2B__land_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_ne_uint8
 GrB_Info GB_Adot3B__land_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_ne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_ne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__land_second_bool.c b/Source/Generated/GB_AxB__land_second_bool.c
index 50d4d92176..7b4bae97e3 100644
--- a/Source/Generated/GB_AxB__land_second_bool.c
+++ b/Source/Generated/GB_AxB__land_second_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__land_second_bool
 // A'*B function (dot2):     GB_Adot2B__land_second_bool
 // A'*B function (dot3):     GB_Adot3B__land_second_bool
-// A*B function (heap):      GB_AheapB__land_second_bool
+// C+=A'*B function (dot4):  GB_Adot4B__land_second_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__land_second_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = bkj
-// Add:      cij = (cij && z)
-// MultAdd:  cij = (cij && bkj)
+// Add:      cij &= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij &= bkj
 // Identity: true
 // Terminal: if (cij == false) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z && y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z &= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     true
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == false) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] &= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x & y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] &= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_LAND_BOOL || GxB_NO_SECOND_BOOL || GxB_NO_LAND_SECOND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__land_second_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__land_second_bool
 GrB_Info GB_Adot2B__land_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__land_second_bool
 GrB_Info GB_Adot3B__land_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__land_second_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__land_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__land_second_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__land_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_bool.c b/Source/Generated/GB_AxB__lor_eq_bool.c
index b432dad2f0..c5ec6a6eca 100644
--- a/Source/Generated/GB_AxB__lor_eq_bool.c
+++ b/Source/Generated/GB_AxB__lor_eq_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_bool
 // A'*B function (dot2):     GB_Adot2B__lor_eq_bool
 // A'*B function (dot3):     GB_Adot3B__lor_eq_bool
-// A*B function (heap):      GB_AheapB__lor_eq_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LOR_EQ_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_bool
 GrB_Info GB_Adot2B__lor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_bool
 GrB_Info GB_Adot3B__lor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_fp32.c b/Source/Generated/GB_AxB__lor_eq_fp32.c
index 979f1a9a19..37898c7dcb 100644
--- a/Source/Generated/GB_AxB__lor_eq_fp32.c
+++ b/Source/Generated/GB_AxB__lor_eq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_eq_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_eq_fp32
-// A*B function (heap):      GB_AheapB__lor_eq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_EQ_FP32 || GxB_NO_LOR_EQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_fp32
 GrB_Info GB_Adot2B__lor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_fp32
 GrB_Info GB_Adot3B__lor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_fp64.c b/Source/Generated/GB_AxB__lor_eq_fp64.c
index 3530b99ac4..b5296a9316 100644
--- a/Source/Generated/GB_AxB__lor_eq_fp64.c
+++ b/Source/Generated/GB_AxB__lor_eq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_eq_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_eq_fp64
-// A*B function (heap):      GB_AheapB__lor_eq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_EQ_FP64 || GxB_NO_LOR_EQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_fp64
 GrB_Info GB_Adot2B__lor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_fp64
 GrB_Info GB_Adot3B__lor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_int16.c b/Source/Generated/GB_AxB__lor_eq_int16.c
index 517a86c1fd..0bd7e17759 100644
--- a/Source/Generated/GB_AxB__lor_eq_int16.c
+++ b/Source/Generated/GB_AxB__lor_eq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_int16
 // A'*B function (dot2):     GB_Adot2B__lor_eq_int16
 // A'*B function (dot3):     GB_Adot3B__lor_eq_int16
-// A*B function (heap):      GB_AheapB__lor_eq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_EQ_INT16 || GxB_NO_LOR_EQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_int16
 GrB_Info GB_Adot2B__lor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_int16
 GrB_Info GB_Adot3B__lor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_int32.c b/Source/Generated/GB_AxB__lor_eq_int32.c
index 47f2152b06..59955ccd5e 100644
--- a/Source/Generated/GB_AxB__lor_eq_int32.c
+++ b/Source/Generated/GB_AxB__lor_eq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_int32
 // A'*B function (dot2):     GB_Adot2B__lor_eq_int32
 // A'*B function (dot3):     GB_Adot3B__lor_eq_int32
-// A*B function (heap):      GB_AheapB__lor_eq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_EQ_INT32 || GxB_NO_LOR_EQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_int32
 GrB_Info GB_Adot2B__lor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_int32
 GrB_Info GB_Adot3B__lor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_int64.c b/Source/Generated/GB_AxB__lor_eq_int64.c
index 8ae59dbac5..2676fbaa88 100644
--- a/Source/Generated/GB_AxB__lor_eq_int64.c
+++ b/Source/Generated/GB_AxB__lor_eq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_int64
 // A'*B function (dot2):     GB_Adot2B__lor_eq_int64
 // A'*B function (dot3):     GB_Adot3B__lor_eq_int64
-// A*B function (heap):      GB_AheapB__lor_eq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_EQ_INT64 || GxB_NO_LOR_EQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_int64
 GrB_Info GB_Adot2B__lor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_int64
 GrB_Info GB_Adot3B__lor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_int8.c b/Source/Generated/GB_AxB__lor_eq_int8.c
index c054dbbad5..0cf2be724c 100644
--- a/Source/Generated/GB_AxB__lor_eq_int8.c
+++ b/Source/Generated/GB_AxB__lor_eq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_int8
 // A'*B function (dot2):     GB_Adot2B__lor_eq_int8
 // A'*B function (dot3):     GB_Adot3B__lor_eq_int8
-// A*B function (heap):      GB_AheapB__lor_eq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_EQ_INT8 || GxB_NO_LOR_EQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_int8
 GrB_Info GB_Adot2B__lor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_int8
 GrB_Info GB_Adot3B__lor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_uint16.c b/Source/Generated/GB_AxB__lor_eq_uint16.c
index 4bca17ed6f..7042b7c328 100644
--- a/Source/Generated/GB_AxB__lor_eq_uint16.c
+++ b/Source/Generated/GB_AxB__lor_eq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_eq_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_eq_uint16
-// A*B function (heap):      GB_AheapB__lor_eq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_EQ_UINT16 || GxB_NO_LOR_EQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_uint16
 GrB_Info GB_Adot2B__lor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_uint16
 GrB_Info GB_Adot3B__lor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_uint32.c b/Source/Generated/GB_AxB__lor_eq_uint32.c
index 0f7b924129..6c77565f59 100644
--- a/Source/Generated/GB_AxB__lor_eq_uint32.c
+++ b/Source/Generated/GB_AxB__lor_eq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_eq_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_eq_uint32
-// A*B function (heap):      GB_AheapB__lor_eq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_EQ_UINT32 || GxB_NO_LOR_EQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_uint32
 GrB_Info GB_Adot2B__lor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_uint32
 GrB_Info GB_Adot3B__lor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_uint64.c b/Source/Generated/GB_AxB__lor_eq_uint64.c
index da0c1ff7d2..5bba000052 100644
--- a/Source/Generated/GB_AxB__lor_eq_uint64.c
+++ b/Source/Generated/GB_AxB__lor_eq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_eq_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_eq_uint64
-// A*B function (heap):      GB_AheapB__lor_eq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_EQ_UINT64 || GxB_NO_LOR_EQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_uint64
 GrB_Info GB_Adot2B__lor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_uint64
 GrB_Info GB_Adot3B__lor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_eq_uint8.c b/Source/Generated/GB_AxB__lor_eq_uint8.c
index 9870b1990b..72d3ccc2e3 100644
--- a/Source/Generated/GB_AxB__lor_eq_uint8.c
+++ b/Source/Generated/GB_AxB__lor_eq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_eq_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_eq_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_eq_uint8
-// A*B function (heap):      GB_AheapB__lor_eq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_eq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_eq_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik == bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik == bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_EQ_UINT8 || GxB_NO_LOR_EQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_eq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_eq_uint8
 GrB_Info GB_Adot2B__lor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_eq_uint8
 GrB_Info GB_Adot3B__lor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_eq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_eq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_first_bool.c b/Source/Generated/GB_AxB__lor_first_bool.c
index c63cf952ec..2934cd4aee 100644
--- a/Source/Generated/GB_AxB__lor_first_bool.c
+++ b/Source/Generated/GB_AxB__lor_first_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_first_bool
 // A'*B function (dot2):     GB_Adot2B__lor_first_bool
 // A'*B function (dot3):     GB_Adot3B__lor_first_bool
-// A*B function (heap):      GB_AheapB__lor_first_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_first_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_first_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = aik
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || aik)
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= aik
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_FIRST_BOOL || GxB_NO_LOR_FIRST_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_first_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_first_bool
 GrB_Info GB_Adot2B__lor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_first_bool
 GrB_Info GB_Adot3B__lor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_first_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_first_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_bool.c b/Source/Generated/GB_AxB__lor_ge_bool.c
index a51136da36..de0f16dc77 100644
--- a/Source/Generated/GB_AxB__lor_ge_bool.c
+++ b/Source/Generated/GB_AxB__lor_ge_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_bool
 // A'*B function (dot2):     GB_Adot2B__lor_ge_bool
 // A'*B function (dot3):     GB_Adot3B__lor_ge_bool
-// A*B function (heap):      GB_AheapB__lor_ge_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_GE_BOOL || GxB_NO_LOR_GE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_bool
 GrB_Info GB_Adot2B__lor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_bool
 GrB_Info GB_Adot3B__lor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_fp32.c b/Source/Generated/GB_AxB__lor_ge_fp32.c
index af2819e6aa..3827cb036d 100644
--- a/Source/Generated/GB_AxB__lor_ge_fp32.c
+++ b/Source/Generated/GB_AxB__lor_ge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_ge_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_ge_fp32
-// A*B function (heap):      GB_AheapB__lor_ge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_GE_FP32 || GxB_NO_LOR_GE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_fp32
 GrB_Info GB_Adot2B__lor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_fp32
 GrB_Info GB_Adot3B__lor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_fp64.c b/Source/Generated/GB_AxB__lor_ge_fp64.c
index bdb20edf12..c4e95e23a3 100644
--- a/Source/Generated/GB_AxB__lor_ge_fp64.c
+++ b/Source/Generated/GB_AxB__lor_ge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_ge_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_ge_fp64
-// A*B function (heap):      GB_AheapB__lor_ge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_GE_FP64 || GxB_NO_LOR_GE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_fp64
 GrB_Info GB_Adot2B__lor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_fp64
 GrB_Info GB_Adot3B__lor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_int16.c b/Source/Generated/GB_AxB__lor_ge_int16.c
index e2fcd153bc..2df11da760 100644
--- a/Source/Generated/GB_AxB__lor_ge_int16.c
+++ b/Source/Generated/GB_AxB__lor_ge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_int16
 // A'*B function (dot2):     GB_Adot2B__lor_ge_int16
 // A'*B function (dot3):     GB_Adot3B__lor_ge_int16
-// A*B function (heap):      GB_AheapB__lor_ge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_GE_INT16 || GxB_NO_LOR_GE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_int16
 GrB_Info GB_Adot2B__lor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_int16
 GrB_Info GB_Adot3B__lor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_int32.c b/Source/Generated/GB_AxB__lor_ge_int32.c
index 08bce7b983..20805b7653 100644
--- a/Source/Generated/GB_AxB__lor_ge_int32.c
+++ b/Source/Generated/GB_AxB__lor_ge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_int32
 // A'*B function (dot2):     GB_Adot2B__lor_ge_int32
 // A'*B function (dot3):     GB_Adot3B__lor_ge_int32
-// A*B function (heap):      GB_AheapB__lor_ge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_GE_INT32 || GxB_NO_LOR_GE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_int32
 GrB_Info GB_Adot2B__lor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_int32
 GrB_Info GB_Adot3B__lor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_int64.c b/Source/Generated/GB_AxB__lor_ge_int64.c
index bbc848c0e0..775a1c94bc 100644
--- a/Source/Generated/GB_AxB__lor_ge_int64.c
+++ b/Source/Generated/GB_AxB__lor_ge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_int64
 // A'*B function (dot2):     GB_Adot2B__lor_ge_int64
 // A'*B function (dot3):     GB_Adot3B__lor_ge_int64
-// A*B function (heap):      GB_AheapB__lor_ge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_GE_INT64 || GxB_NO_LOR_GE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_int64
 GrB_Info GB_Adot2B__lor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_int64
 GrB_Info GB_Adot3B__lor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_int8.c b/Source/Generated/GB_AxB__lor_ge_int8.c
index 3299b6e48f..e9bdaebfef 100644
--- a/Source/Generated/GB_AxB__lor_ge_int8.c
+++ b/Source/Generated/GB_AxB__lor_ge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_int8
 // A'*B function (dot2):     GB_Adot2B__lor_ge_int8
 // A'*B function (dot3):     GB_Adot3B__lor_ge_int8
-// A*B function (heap):      GB_AheapB__lor_ge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_GE_INT8 || GxB_NO_LOR_GE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_int8
 GrB_Info GB_Adot2B__lor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_int8
 GrB_Info GB_Adot3B__lor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_uint16.c b/Source/Generated/GB_AxB__lor_ge_uint16.c
index 266633f98c..0d67d4010f 100644
--- a/Source/Generated/GB_AxB__lor_ge_uint16.c
+++ b/Source/Generated/GB_AxB__lor_ge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_ge_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_ge_uint16
-// A*B function (heap):      GB_AheapB__lor_ge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_GE_UINT16 || GxB_NO_LOR_GE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_uint16
 GrB_Info GB_Adot2B__lor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_uint16
 GrB_Info GB_Adot3B__lor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_uint32.c b/Source/Generated/GB_AxB__lor_ge_uint32.c
index 7b6f82ae8f..6b4ca1228b 100644
--- a/Source/Generated/GB_AxB__lor_ge_uint32.c
+++ b/Source/Generated/GB_AxB__lor_ge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_ge_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_ge_uint32
-// A*B function (heap):      GB_AheapB__lor_ge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_GE_UINT32 || GxB_NO_LOR_GE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_uint32
 GrB_Info GB_Adot2B__lor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_uint32
 GrB_Info GB_Adot3B__lor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_uint64.c b/Source/Generated/GB_AxB__lor_ge_uint64.c
index 8e2e9854d5..bdcae5ad33 100644
--- a/Source/Generated/GB_AxB__lor_ge_uint64.c
+++ b/Source/Generated/GB_AxB__lor_ge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_ge_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_ge_uint64
-// A*B function (heap):      GB_AheapB__lor_ge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_GE_UINT64 || GxB_NO_LOR_GE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_uint64
 GrB_Info GB_Adot2B__lor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_uint64
 GrB_Info GB_Adot3B__lor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ge_uint8.c b/Source/Generated/GB_AxB__lor_ge_uint8.c
index 4e4469972c..00994a540b 100644
--- a/Source/Generated/GB_AxB__lor_ge_uint8.c
+++ b/Source/Generated/GB_AxB__lor_ge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ge_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_ge_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_ge_uint8
-// A*B function (heap):      GB_AheapB__lor_ge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ge_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik >= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik >= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_GE_UINT8 || GxB_NO_LOR_GE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ge_uint8
 GrB_Info GB_Adot2B__lor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ge_uint8
 GrB_Info GB_Adot3B__lor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_bool.c b/Source/Generated/GB_AxB__lor_gt_bool.c
index f97af4af39..78b04c23f2 100644
--- a/Source/Generated/GB_AxB__lor_gt_bool.c
+++ b/Source/Generated/GB_AxB__lor_gt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_bool
 // A'*B function (dot2):     GB_Adot2B__lor_gt_bool
 // A'*B function (dot3):     GB_Adot3B__lor_gt_bool
-// A*B function (heap):      GB_AheapB__lor_gt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_GT_BOOL || GxB_NO_LOR_GT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_bool
 GrB_Info GB_Adot2B__lor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_bool
 GrB_Info GB_Adot3B__lor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_fp32.c b/Source/Generated/GB_AxB__lor_gt_fp32.c
index c92ec9b9c3..a3e19c1d86 100644
--- a/Source/Generated/GB_AxB__lor_gt_fp32.c
+++ b/Source/Generated/GB_AxB__lor_gt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_gt_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_gt_fp32
-// A*B function (heap):      GB_AheapB__lor_gt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_GT_FP32 || GxB_NO_LOR_GT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_fp32
 GrB_Info GB_Adot2B__lor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_fp32
 GrB_Info GB_Adot3B__lor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_fp64.c b/Source/Generated/GB_AxB__lor_gt_fp64.c
index 74a524bc5f..1a92af12ee 100644
--- a/Source/Generated/GB_AxB__lor_gt_fp64.c
+++ b/Source/Generated/GB_AxB__lor_gt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_gt_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_gt_fp64
-// A*B function (heap):      GB_AheapB__lor_gt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_GT_FP64 || GxB_NO_LOR_GT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_fp64
 GrB_Info GB_Adot2B__lor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_fp64
 GrB_Info GB_Adot3B__lor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_int16.c b/Source/Generated/GB_AxB__lor_gt_int16.c
index 58a95dbfcc..0ecdf70e9e 100644
--- a/Source/Generated/GB_AxB__lor_gt_int16.c
+++ b/Source/Generated/GB_AxB__lor_gt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_int16
 // A'*B function (dot2):     GB_Adot2B__lor_gt_int16
 // A'*B function (dot3):     GB_Adot3B__lor_gt_int16
-// A*B function (heap):      GB_AheapB__lor_gt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_GT_INT16 || GxB_NO_LOR_GT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_int16
 GrB_Info GB_Adot2B__lor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_int16
 GrB_Info GB_Adot3B__lor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_int32.c b/Source/Generated/GB_AxB__lor_gt_int32.c
index 9476377d09..10d01b7728 100644
--- a/Source/Generated/GB_AxB__lor_gt_int32.c
+++ b/Source/Generated/GB_AxB__lor_gt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_int32
 // A'*B function (dot2):     GB_Adot2B__lor_gt_int32
 // A'*B function (dot3):     GB_Adot3B__lor_gt_int32
-// A*B function (heap):      GB_AheapB__lor_gt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_GT_INT32 || GxB_NO_LOR_GT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_int32
 GrB_Info GB_Adot2B__lor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_int32
 GrB_Info GB_Adot3B__lor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_int64.c b/Source/Generated/GB_AxB__lor_gt_int64.c
index 93756c8fc1..f70f1b9a9d 100644
--- a/Source/Generated/GB_AxB__lor_gt_int64.c
+++ b/Source/Generated/GB_AxB__lor_gt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_int64
 // A'*B function (dot2):     GB_Adot2B__lor_gt_int64
 // A'*B function (dot3):     GB_Adot3B__lor_gt_int64
-// A*B function (heap):      GB_AheapB__lor_gt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_GT_INT64 || GxB_NO_LOR_GT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_int64
 GrB_Info GB_Adot2B__lor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_int64
 GrB_Info GB_Adot3B__lor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_int8.c b/Source/Generated/GB_AxB__lor_gt_int8.c
index 0663f95f38..6aef5c23d7 100644
--- a/Source/Generated/GB_AxB__lor_gt_int8.c
+++ b/Source/Generated/GB_AxB__lor_gt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_int8
 // A'*B function (dot2):     GB_Adot2B__lor_gt_int8
 // A'*B function (dot3):     GB_Adot3B__lor_gt_int8
-// A*B function (heap):      GB_AheapB__lor_gt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_GT_INT8 || GxB_NO_LOR_GT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_int8
 GrB_Info GB_Adot2B__lor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_int8
 GrB_Info GB_Adot3B__lor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_uint16.c b/Source/Generated/GB_AxB__lor_gt_uint16.c
index 9e08a3fc61..e6ea018439 100644
--- a/Source/Generated/GB_AxB__lor_gt_uint16.c
+++ b/Source/Generated/GB_AxB__lor_gt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_gt_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_gt_uint16
-// A*B function (heap):      GB_AheapB__lor_gt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_GT_UINT16 || GxB_NO_LOR_GT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_uint16
 GrB_Info GB_Adot2B__lor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_uint16
 GrB_Info GB_Adot3B__lor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_uint32.c b/Source/Generated/GB_AxB__lor_gt_uint32.c
index 3b75b3a0d0..b550036e97 100644
--- a/Source/Generated/GB_AxB__lor_gt_uint32.c
+++ b/Source/Generated/GB_AxB__lor_gt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_gt_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_gt_uint32
-// A*B function (heap):      GB_AheapB__lor_gt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_GT_UINT32 || GxB_NO_LOR_GT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_uint32
 GrB_Info GB_Adot2B__lor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_uint32
 GrB_Info GB_Adot3B__lor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_uint64.c b/Source/Generated/GB_AxB__lor_gt_uint64.c
index ae9b8f6bd3..2534472d9f 100644
--- a/Source/Generated/GB_AxB__lor_gt_uint64.c
+++ b/Source/Generated/GB_AxB__lor_gt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_gt_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_gt_uint64
-// A*B function (heap):      GB_AheapB__lor_gt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_GT_UINT64 || GxB_NO_LOR_GT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_uint64
 GrB_Info GB_Adot2B__lor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_uint64
 GrB_Info GB_Adot3B__lor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_gt_uint8.c b/Source/Generated/GB_AxB__lor_gt_uint8.c
index e01b9cb8c5..51ecd1720b 100644
--- a/Source/Generated/GB_AxB__lor_gt_uint8.c
+++ b/Source/Generated/GB_AxB__lor_gt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_gt_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_gt_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_gt_uint8
-// A*B function (heap):      GB_AheapB__lor_gt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_gt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_gt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik > bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik > bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_GT_UINT8 || GxB_NO_LOR_GT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_gt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_gt_uint8
 GrB_Info GB_Adot2B__lor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_gt_uint8
 GrB_Info GB_Adot3B__lor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_gt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_gt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_land_bool.c b/Source/Generated/GB_AxB__lor_land_bool.c
index f1cf7b0ee1..085174fe24 100644
--- a/Source/Generated/GB_AxB__lor_land_bool.c
+++ b/Source/Generated/GB_AxB__lor_land_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_land_bool
 // A'*B function (dot2):     GB_Adot2B__lor_land_bool
 // A'*B function (dot3):     GB_Adot3B__lor_land_bool
-// A*B function (heap):      GB_AheapB__lor_land_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_land_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_land_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik && bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik && bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik && bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x && y) ;
+#define GB_MULT(z, x, y) \
+    z = (x && y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x && y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x && y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LOR_LAND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_land_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_land_bool
 GrB_Info GB_Adot2B__lor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_land_bool
 GrB_Info GB_Adot3B__lor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_land_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_land_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_bool.c b/Source/Generated/GB_AxB__lor_le_bool.c
index 027e5a5d40..10f5952fe6 100644
--- a/Source/Generated/GB_AxB__lor_le_bool.c
+++ b/Source/Generated/GB_AxB__lor_le_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_bool
 // A'*B function (dot2):     GB_Adot2B__lor_le_bool
 // A'*B function (dot3):     GB_Adot3B__lor_le_bool
-// A*B function (heap):      GB_AheapB__lor_le_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LE_BOOL || GxB_NO_LOR_LE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_bool
 GrB_Info GB_Adot2B__lor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_bool
 GrB_Info GB_Adot3B__lor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_fp32.c b/Source/Generated/GB_AxB__lor_le_fp32.c
index 33f56a7371..3702033668 100644
--- a/Source/Generated/GB_AxB__lor_le_fp32.c
+++ b/Source/Generated/GB_AxB__lor_le_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_le_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_le_fp32
-// A*B function (heap):      GB_AheapB__lor_le_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_LE_FP32 || GxB_NO_LOR_LE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_fp32
 GrB_Info GB_Adot2B__lor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_fp32
 GrB_Info GB_Adot3B__lor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_fp64.c b/Source/Generated/GB_AxB__lor_le_fp64.c
index a5a274df90..36be0efea4 100644
--- a/Source/Generated/GB_AxB__lor_le_fp64.c
+++ b/Source/Generated/GB_AxB__lor_le_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_le_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_le_fp64
-// A*B function (heap):      GB_AheapB__lor_le_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_LE_FP64 || GxB_NO_LOR_LE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_fp64
 GrB_Info GB_Adot2B__lor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_fp64
 GrB_Info GB_Adot3B__lor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_int16.c b/Source/Generated/GB_AxB__lor_le_int16.c
index 701eaa1dc9..db659cb6df 100644
--- a/Source/Generated/GB_AxB__lor_le_int16.c
+++ b/Source/Generated/GB_AxB__lor_le_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_int16
 // A'*B function (dot2):     GB_Adot2B__lor_le_int16
 // A'*B function (dot3):     GB_Adot3B__lor_le_int16
-// A*B function (heap):      GB_AheapB__lor_le_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_LE_INT16 || GxB_NO_LOR_LE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_int16
 GrB_Info GB_Adot2B__lor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_int16
 GrB_Info GB_Adot3B__lor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_int32.c b/Source/Generated/GB_AxB__lor_le_int32.c
index 083e13406c..9ad30d4b5f 100644
--- a/Source/Generated/GB_AxB__lor_le_int32.c
+++ b/Source/Generated/GB_AxB__lor_le_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_int32
 // A'*B function (dot2):     GB_Adot2B__lor_le_int32
 // A'*B function (dot3):     GB_Adot3B__lor_le_int32
-// A*B function (heap):      GB_AheapB__lor_le_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_LE_INT32 || GxB_NO_LOR_LE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_int32
 GrB_Info GB_Adot2B__lor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_int32
 GrB_Info GB_Adot3B__lor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_int64.c b/Source/Generated/GB_AxB__lor_le_int64.c
index eeb7cb68a9..1f218a0fe9 100644
--- a/Source/Generated/GB_AxB__lor_le_int64.c
+++ b/Source/Generated/GB_AxB__lor_le_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_int64
 // A'*B function (dot2):     GB_Adot2B__lor_le_int64
 // A'*B function (dot3):     GB_Adot3B__lor_le_int64
-// A*B function (heap):      GB_AheapB__lor_le_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_LE_INT64 || GxB_NO_LOR_LE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_int64
 GrB_Info GB_Adot2B__lor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_int64
 GrB_Info GB_Adot3B__lor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_int8.c b/Source/Generated/GB_AxB__lor_le_int8.c
index 0d9e6b12b7..4b366743e1 100644
--- a/Source/Generated/GB_AxB__lor_le_int8.c
+++ b/Source/Generated/GB_AxB__lor_le_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_int8
 // A'*B function (dot2):     GB_Adot2B__lor_le_int8
 // A'*B function (dot3):     GB_Adot3B__lor_le_int8
-// A*B function (heap):      GB_AheapB__lor_le_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_LE_INT8 || GxB_NO_LOR_LE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_int8
 GrB_Info GB_Adot2B__lor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_int8
 GrB_Info GB_Adot3B__lor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_uint16.c b/Source/Generated/GB_AxB__lor_le_uint16.c
index 150139b129..0bc240df90 100644
--- a/Source/Generated/GB_AxB__lor_le_uint16.c
+++ b/Source/Generated/GB_AxB__lor_le_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_le_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_le_uint16
-// A*B function (heap):      GB_AheapB__lor_le_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_LE_UINT16 || GxB_NO_LOR_LE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_uint16
 GrB_Info GB_Adot2B__lor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_uint16
 GrB_Info GB_Adot3B__lor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_uint32.c b/Source/Generated/GB_AxB__lor_le_uint32.c
index 0c965cb677..51c552114f 100644
--- a/Source/Generated/GB_AxB__lor_le_uint32.c
+++ b/Source/Generated/GB_AxB__lor_le_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_le_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_le_uint32
-// A*B function (heap):      GB_AheapB__lor_le_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_LE_UINT32 || GxB_NO_LOR_LE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_uint32
 GrB_Info GB_Adot2B__lor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_uint32
 GrB_Info GB_Adot3B__lor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_uint64.c b/Source/Generated/GB_AxB__lor_le_uint64.c
index aa6448426e..e3796f49d4 100644
--- a/Source/Generated/GB_AxB__lor_le_uint64.c
+++ b/Source/Generated/GB_AxB__lor_le_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_le_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_le_uint64
-// A*B function (heap):      GB_AheapB__lor_le_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_LE_UINT64 || GxB_NO_LOR_LE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_uint64
 GrB_Info GB_Adot2B__lor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_uint64
 GrB_Info GB_Adot3B__lor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_le_uint8.c b/Source/Generated/GB_AxB__lor_le_uint8.c
index 94fc5d3552..9f76aeff0f 100644
--- a/Source/Generated/GB_AxB__lor_le_uint8.c
+++ b/Source/Generated/GB_AxB__lor_le_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_le_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_le_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_le_uint8
-// A*B function (heap):      GB_AheapB__lor_le_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_le_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_le_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik <= bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik <= bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_LE_UINT8 || GxB_NO_LOR_LE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_le_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_le_uint8
 GrB_Info GB_Adot2B__lor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_le_uint8
 GrB_Info GB_Adot3B__lor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_le_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_le_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lor_bool.c b/Source/Generated/GB_AxB__lor_lor_bool.c
index 21e3336019..f61c083fe0 100644
--- a/Source/Generated/GB_AxB__lor_lor_bool.c
+++ b/Source/Generated/GB_AxB__lor_lor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lor_bool
 // A'*B function (dot2):     GB_Adot2B__lor_lor_bool
 // A'*B function (dot3):     GB_Adot3B__lor_lor_bool
-// A*B function (heap):      GB_AheapB__lor_lor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik || bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik || bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik || bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x || y) ;
+#define GB_MULT(z, x, y) \
+    z = (x || y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x || y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x || y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LOR_LOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lor_bool
 GrB_Info GB_Adot2B__lor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lor_bool
 GrB_Info GB_Adot3B__lor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_bool.c b/Source/Generated/GB_AxB__lor_lt_bool.c
index e18e312297..263a6bbc79 100644
--- a/Source/Generated/GB_AxB__lor_lt_bool.c
+++ b/Source/Generated/GB_AxB__lor_lt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_bool
 // A'*B function (dot2):     GB_Adot2B__lor_lt_bool
 // A'*B function (dot3):     GB_Adot3B__lor_lt_bool
-// A*B function (heap):      GB_AheapB__lor_lt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LT_BOOL || GxB_NO_LOR_LT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_bool
 GrB_Info GB_Adot2B__lor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_bool
 GrB_Info GB_Adot3B__lor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_fp32.c b/Source/Generated/GB_AxB__lor_lt_fp32.c
index 4fe0fa7f47..8f72a08aec 100644
--- a/Source/Generated/GB_AxB__lor_lt_fp32.c
+++ b/Source/Generated/GB_AxB__lor_lt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_lt_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_lt_fp32
-// A*B function (heap):      GB_AheapB__lor_lt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_LT_FP32 || GxB_NO_LOR_LT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_fp32
 GrB_Info GB_Adot2B__lor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_fp32
 GrB_Info GB_Adot3B__lor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_fp64.c b/Source/Generated/GB_AxB__lor_lt_fp64.c
index d885255292..60d5c29ddb 100644
--- a/Source/Generated/GB_AxB__lor_lt_fp64.c
+++ b/Source/Generated/GB_AxB__lor_lt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_lt_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_lt_fp64
-// A*B function (heap):      GB_AheapB__lor_lt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_LT_FP64 || GxB_NO_LOR_LT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_fp64
 GrB_Info GB_Adot2B__lor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_fp64
 GrB_Info GB_Adot3B__lor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_int16.c b/Source/Generated/GB_AxB__lor_lt_int16.c
index 4936233a88..e3cd18c96f 100644
--- a/Source/Generated/GB_AxB__lor_lt_int16.c
+++ b/Source/Generated/GB_AxB__lor_lt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_int16
 // A'*B function (dot2):     GB_Adot2B__lor_lt_int16
 // A'*B function (dot3):     GB_Adot3B__lor_lt_int16
-// A*B function (heap):      GB_AheapB__lor_lt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_LT_INT16 || GxB_NO_LOR_LT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_int16
 GrB_Info GB_Adot2B__lor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_int16
 GrB_Info GB_Adot3B__lor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_int32.c b/Source/Generated/GB_AxB__lor_lt_int32.c
index 41066c8437..b9f9875bb0 100644
--- a/Source/Generated/GB_AxB__lor_lt_int32.c
+++ b/Source/Generated/GB_AxB__lor_lt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_int32
 // A'*B function (dot2):     GB_Adot2B__lor_lt_int32
 // A'*B function (dot3):     GB_Adot3B__lor_lt_int32
-// A*B function (heap):      GB_AheapB__lor_lt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_LT_INT32 || GxB_NO_LOR_LT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_int32
 GrB_Info GB_Adot2B__lor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_int32
 GrB_Info GB_Adot3B__lor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_int64.c b/Source/Generated/GB_AxB__lor_lt_int64.c
index 57b1557035..d634c33872 100644
--- a/Source/Generated/GB_AxB__lor_lt_int64.c
+++ b/Source/Generated/GB_AxB__lor_lt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_int64
 // A'*B function (dot2):     GB_Adot2B__lor_lt_int64
 // A'*B function (dot3):     GB_Adot3B__lor_lt_int64
-// A*B function (heap):      GB_AheapB__lor_lt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_LT_INT64 || GxB_NO_LOR_LT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_int64
 GrB_Info GB_Adot2B__lor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_int64
 GrB_Info GB_Adot3B__lor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_int8.c b/Source/Generated/GB_AxB__lor_lt_int8.c
index 5c6f9663f1..5c48293821 100644
--- a/Source/Generated/GB_AxB__lor_lt_int8.c
+++ b/Source/Generated/GB_AxB__lor_lt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_int8
 // A'*B function (dot2):     GB_Adot2B__lor_lt_int8
 // A'*B function (dot3):     GB_Adot3B__lor_lt_int8
-// A*B function (heap):      GB_AheapB__lor_lt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_LT_INT8 || GxB_NO_LOR_LT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_int8
 GrB_Info GB_Adot2B__lor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_int8
 GrB_Info GB_Adot3B__lor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_uint16.c b/Source/Generated/GB_AxB__lor_lt_uint16.c
index 71622a2601..9c27f3d0de 100644
--- a/Source/Generated/GB_AxB__lor_lt_uint16.c
+++ b/Source/Generated/GB_AxB__lor_lt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_lt_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_lt_uint16
-// A*B function (heap):      GB_AheapB__lor_lt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_LT_UINT16 || GxB_NO_LOR_LT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_uint16
 GrB_Info GB_Adot2B__lor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_uint16
 GrB_Info GB_Adot3B__lor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_uint32.c b/Source/Generated/GB_AxB__lor_lt_uint32.c
index 693a9a016b..d95aa0941d 100644
--- a/Source/Generated/GB_AxB__lor_lt_uint32.c
+++ b/Source/Generated/GB_AxB__lor_lt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_lt_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_lt_uint32
-// A*B function (heap):      GB_AheapB__lor_lt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_LT_UINT32 || GxB_NO_LOR_LT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_uint32
 GrB_Info GB_Adot2B__lor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_uint32
 GrB_Info GB_Adot3B__lor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_uint64.c b/Source/Generated/GB_AxB__lor_lt_uint64.c
index 85a95ae081..a86124d39e 100644
--- a/Source/Generated/GB_AxB__lor_lt_uint64.c
+++ b/Source/Generated/GB_AxB__lor_lt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_lt_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_lt_uint64
-// A*B function (heap):      GB_AheapB__lor_lt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_LT_UINT64 || GxB_NO_LOR_LT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_uint64
 GrB_Info GB_Adot2B__lor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_uint64
 GrB_Info GB_Adot3B__lor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lt_uint8.c b/Source/Generated/GB_AxB__lor_lt_uint8.c
index 26c993826e..af43074ad7 100644
--- a/Source/Generated/GB_AxB__lor_lt_uint8.c
+++ b/Source/Generated/GB_AxB__lor_lt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lt_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_lt_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_lt_uint8
-// A*B function (heap):      GB_AheapB__lor_lt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik < bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik < bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_LT_UINT8 || GxB_NO_LOR_LT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lt_uint8
 GrB_Info GB_Adot2B__lor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lt_uint8
 GrB_Info GB_Adot3B__lor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_lxor_bool.c b/Source/Generated/GB_AxB__lor_lxor_bool.c
index eb4c593cb5..46bb9b0056 100644
--- a/Source/Generated/GB_AxB__lor_lxor_bool.c
+++ b/Source/Generated/GB_AxB__lor_lxor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_lxor_bool
 // A'*B function (dot2):     GB_Adot2B__lor_lxor_bool
 // A'*B function (dot3):     GB_Adot3B__lor_lxor_bool
-// A*B function (heap):      GB_AheapB__lor_lxor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_lxor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_lxor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LOR_LXOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_lxor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_lxor_bool
 GrB_Info GB_Adot2B__lor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_lxor_bool
 GrB_Info GB_Adot3B__lor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_lxor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_lxor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_fp32.c b/Source/Generated/GB_AxB__lor_ne_fp32.c
index b3c43d439e..f7c8f06255 100644
--- a/Source/Generated/GB_AxB__lor_ne_fp32.c
+++ b/Source/Generated/GB_AxB__lor_ne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_fp32
 // A'*B function (dot2):     GB_Adot2B__lor_ne_fp32
 // A'*B function (dot3):     GB_Adot3B__lor_ne_fp32
-// A*B function (heap):      GB_AheapB__lor_ne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_FP32 || GxB_NO_LOR_BOOL || GxB_NO_NE_FP32 || GxB_NO_LOR_NE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_fp32
 GrB_Info GB_Adot2B__lor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_fp32
 GrB_Info GB_Adot3B__lor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_fp64.c b/Source/Generated/GB_AxB__lor_ne_fp64.c
index 9391d9207b..a51dceab72 100644
--- a/Source/Generated/GB_AxB__lor_ne_fp64.c
+++ b/Source/Generated/GB_AxB__lor_ne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_fp64
 // A'*B function (dot2):     GB_Adot2B__lor_ne_fp64
 // A'*B function (dot3):     GB_Adot3B__lor_ne_fp64
-// A*B function (heap):      GB_AheapB__lor_ne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_FP64 || GxB_NO_LOR_BOOL || GxB_NO_NE_FP64 || GxB_NO_LOR_NE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_fp64
 GrB_Info GB_Adot2B__lor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_fp64
 GrB_Info GB_Adot3B__lor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_int16.c b/Source/Generated/GB_AxB__lor_ne_int16.c
index eed853d4d5..779573e317 100644
--- a/Source/Generated/GB_AxB__lor_ne_int16.c
+++ b/Source/Generated/GB_AxB__lor_ne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_int16
 // A'*B function (dot2):     GB_Adot2B__lor_ne_int16
 // A'*B function (dot3):     GB_Adot3B__lor_ne_int16
-// A*B function (heap):      GB_AheapB__lor_ne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_INT16 || GxB_NO_LOR_BOOL || GxB_NO_NE_INT16 || GxB_NO_LOR_NE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_int16
 GrB_Info GB_Adot2B__lor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_int16
 GrB_Info GB_Adot3B__lor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_int32.c b/Source/Generated/GB_AxB__lor_ne_int32.c
index d138bdbbee..c00a4667ce 100644
--- a/Source/Generated/GB_AxB__lor_ne_int32.c
+++ b/Source/Generated/GB_AxB__lor_ne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_int32
 // A'*B function (dot2):     GB_Adot2B__lor_ne_int32
 // A'*B function (dot3):     GB_Adot3B__lor_ne_int32
-// A*B function (heap):      GB_AheapB__lor_ne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_INT32 || GxB_NO_LOR_BOOL || GxB_NO_NE_INT32 || GxB_NO_LOR_NE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_int32
 GrB_Info GB_Adot2B__lor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_int32
 GrB_Info GB_Adot3B__lor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_int64.c b/Source/Generated/GB_AxB__lor_ne_int64.c
index d7d9d2cbc1..5463f23301 100644
--- a/Source/Generated/GB_AxB__lor_ne_int64.c
+++ b/Source/Generated/GB_AxB__lor_ne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_int64
 // A'*B function (dot2):     GB_Adot2B__lor_ne_int64
 // A'*B function (dot3):     GB_Adot3B__lor_ne_int64
-// A*B function (heap):      GB_AheapB__lor_ne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_INT64 || GxB_NO_LOR_BOOL || GxB_NO_NE_INT64 || GxB_NO_LOR_NE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_int64
 GrB_Info GB_Adot2B__lor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_int64
 GrB_Info GB_Adot3B__lor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_int8.c b/Source/Generated/GB_AxB__lor_ne_int8.c
index 7f8052152a..52257acd05 100644
--- a/Source/Generated/GB_AxB__lor_ne_int8.c
+++ b/Source/Generated/GB_AxB__lor_ne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_int8
 // A'*B function (dot2):     GB_Adot2B__lor_ne_int8
 // A'*B function (dot3):     GB_Adot3B__lor_ne_int8
-// A*B function (heap):      GB_AheapB__lor_ne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_INT8 || GxB_NO_LOR_BOOL || GxB_NO_NE_INT8 || GxB_NO_LOR_NE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_int8
 GrB_Info GB_Adot2B__lor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_int8
 GrB_Info GB_Adot3B__lor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_uint16.c b/Source/Generated/GB_AxB__lor_ne_uint16.c
index ca54c2ebee..1aaa4c236c 100644
--- a/Source/Generated/GB_AxB__lor_ne_uint16.c
+++ b/Source/Generated/GB_AxB__lor_ne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_uint16
 // A'*B function (dot2):     GB_Adot2B__lor_ne_uint16
 // A'*B function (dot3):     GB_Adot3B__lor_ne_uint16
-// A*B function (heap):      GB_AheapB__lor_ne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_LOR_BOOL || GxB_NO_NE_UINT16 || GxB_NO_LOR_NE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_uint16
 GrB_Info GB_Adot2B__lor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_uint16
 GrB_Info GB_Adot3B__lor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_uint32.c b/Source/Generated/GB_AxB__lor_ne_uint32.c
index 6febe44f8e..9093ae5309 100644
--- a/Source/Generated/GB_AxB__lor_ne_uint32.c
+++ b/Source/Generated/GB_AxB__lor_ne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_uint32
 // A'*B function (dot2):     GB_Adot2B__lor_ne_uint32
 // A'*B function (dot3):     GB_Adot3B__lor_ne_uint32
-// A*B function (heap):      GB_AheapB__lor_ne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_LOR_BOOL || GxB_NO_NE_UINT32 || GxB_NO_LOR_NE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_uint32
 GrB_Info GB_Adot2B__lor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_uint32
 GrB_Info GB_Adot3B__lor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_uint64.c b/Source/Generated/GB_AxB__lor_ne_uint64.c
index f81c14c12f..9e7388551d 100644
--- a/Source/Generated/GB_AxB__lor_ne_uint64.c
+++ b/Source/Generated/GB_AxB__lor_ne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_uint64
 // A'*B function (dot2):     GB_Adot2B__lor_ne_uint64
 // A'*B function (dot3):     GB_Adot3B__lor_ne_uint64
-// A*B function (heap):      GB_AheapB__lor_ne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_LOR_BOOL || GxB_NO_NE_UINT64 || GxB_NO_LOR_NE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_uint64
 GrB_Info GB_Adot2B__lor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_uint64
 GrB_Info GB_Adot3B__lor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_ne_uint8.c b/Source/Generated/GB_AxB__lor_ne_uint8.c
index 139cac87c0..e298a4c9df 100644
--- a/Source/Generated/GB_AxB__lor_ne_uint8.c
+++ b/Source/Generated/GB_AxB__lor_ne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_ne_uint8
 // A'*B function (dot2):     GB_Adot2B__lor_ne_uint8
 // A'*B function (dot3):     GB_Adot3B__lor_ne_uint8
-// A*B function (heap):      GB_AheapB__lor_ne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lor_ne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_ne_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || (aik != bkj))
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= (aik != bkj)
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_LOR_BOOL || GxB_NO_NE_UINT8 || GxB_NO_LOR_NE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_ne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_ne_uint8
 GrB_Info GB_Adot2B__lor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_ne_uint8
 GrB_Info GB_Adot3B__lor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_ne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_ne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lor_second_bool.c b/Source/Generated/GB_AxB__lor_second_bool.c
index a4c713d02e..b041f9ed5b 100644
--- a/Source/Generated/GB_AxB__lor_second_bool.c
+++ b/Source/Generated/GB_AxB__lor_second_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lor_second_bool
 // A'*B function (dot2):     GB_Adot2B__lor_second_bool
 // A'*B function (dot3):     GB_Adot3B__lor_second_bool
-// A*B function (heap):      GB_AheapB__lor_second_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lor_second_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lor_second_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = bkj
-// Add:      cij = (cij || z)
-// MultAdd:  cij = (cij || bkj)
+// Add:      cij |= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij |= bkj
 // Identity: false
 // Terminal: if (cij == true) break ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z || y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z |= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == true) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] |= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x | y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] |= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_LOR_BOOL || GxB_NO_SECOND_BOOL || GxB_NO_LOR_SECOND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lor_second_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lor_second_bool
 GrB_Info GB_Adot2B__lor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lor_second_bool
 GrB_Info GB_Adot3B__lor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lor_second_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lor_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lor_second_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lor_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_bool.c b/Source/Generated/GB_AxB__lxor_eq_bool.c
index 774bcf9220..7a4bdb14dc 100644
--- a/Source/Generated/GB_AxB__lxor_eq_bool.c
+++ b/Source/Generated/GB_AxB__lxor_eq_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_bool
-// A*B function (heap):      GB_AheapB__lxor_eq_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_EQ_BOOL || GxB_NO_LXOR_EQ_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_bool
 GrB_Info GB_Adot2B__lxor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_bool
 GrB_Info GB_Adot3B__lxor_eq_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_fp32.c b/Source/Generated/GB_AxB__lxor_eq_fp32.c
index a83acb54b0..873f3a6499 100644
--- a/Source/Generated/GB_AxB__lxor_eq_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_eq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_fp32
-// A*B function (heap):      GB_AheapB__lxor_eq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_FP32 || GxB_NO_LXOR_EQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_fp32
 GrB_Info GB_Adot2B__lxor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_fp32
 GrB_Info GB_Adot3B__lxor_eq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_fp64.c b/Source/Generated/GB_AxB__lxor_eq_fp64.c
index 932b05d9c7..a0a4e2249f 100644
--- a/Source/Generated/GB_AxB__lxor_eq_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_eq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_fp64
-// A*B function (heap):      GB_AheapB__lxor_eq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_FP64 || GxB_NO_LXOR_EQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_fp64
 GrB_Info GB_Adot2B__lxor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_fp64
 GrB_Info GB_Adot3B__lxor_eq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_int16.c b/Source/Generated/GB_AxB__lxor_eq_int16.c
index 43cc175626..4b1a63e5a4 100644
--- a/Source/Generated/GB_AxB__lxor_eq_int16.c
+++ b/Source/Generated/GB_AxB__lxor_eq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_int16
-// A*B function (heap):      GB_AheapB__lxor_eq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_INT16 || GxB_NO_LXOR_EQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_int16
 GrB_Info GB_Adot2B__lxor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_int16
 GrB_Info GB_Adot3B__lxor_eq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_int32.c b/Source/Generated/GB_AxB__lxor_eq_int32.c
index 6313edcb2b..ea373cfd56 100644
--- a/Source/Generated/GB_AxB__lxor_eq_int32.c
+++ b/Source/Generated/GB_AxB__lxor_eq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_int32
-// A*B function (heap):      GB_AheapB__lxor_eq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_INT32 || GxB_NO_LXOR_EQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_int32
 GrB_Info GB_Adot2B__lxor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_int32
 GrB_Info GB_Adot3B__lxor_eq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_int64.c b/Source/Generated/GB_AxB__lxor_eq_int64.c
index 32931f6f46..591ce4c6b0 100644
--- a/Source/Generated/GB_AxB__lxor_eq_int64.c
+++ b/Source/Generated/GB_AxB__lxor_eq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_int64
-// A*B function (heap):      GB_AheapB__lxor_eq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_INT64 || GxB_NO_LXOR_EQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_int64
 GrB_Info GB_Adot2B__lxor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_int64
 GrB_Info GB_Adot3B__lxor_eq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_int8.c b/Source/Generated/GB_AxB__lxor_eq_int8.c
index b96e5d788b..9aabcfd0f9 100644
--- a/Source/Generated/GB_AxB__lxor_eq_int8.c
+++ b/Source/Generated/GB_AxB__lxor_eq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_int8
-// A*B function (heap):      GB_AheapB__lxor_eq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_INT8 || GxB_NO_LXOR_EQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_int8
 GrB_Info GB_Adot2B__lxor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_int8
 GrB_Info GB_Adot3B__lxor_eq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_uint16.c b/Source/Generated/GB_AxB__lxor_eq_uint16.c
index e591e8bda7..147ff305b7 100644
--- a/Source/Generated/GB_AxB__lxor_eq_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_eq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_uint16
-// A*B function (heap):      GB_AheapB__lxor_eq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_UINT16 || GxB_NO_LXOR_EQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_uint16
 GrB_Info GB_Adot2B__lxor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_uint16
 GrB_Info GB_Adot3B__lxor_eq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_uint32.c b/Source/Generated/GB_AxB__lxor_eq_uint32.c
index e90b7068ef..9d29bd8ae5 100644
--- a/Source/Generated/GB_AxB__lxor_eq_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_eq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_uint32
-// A*B function (heap):      GB_AheapB__lxor_eq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_UINT32 || GxB_NO_LXOR_EQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_uint32
 GrB_Info GB_Adot2B__lxor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_uint32
 GrB_Info GB_Adot3B__lxor_eq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_uint64.c b/Source/Generated/GB_AxB__lxor_eq_uint64.c
index 1136d63dd3..f5855980b1 100644
--- a/Source/Generated/GB_AxB__lxor_eq_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_eq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_uint64
-// A*B function (heap):      GB_AheapB__lxor_eq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_UINT64 || GxB_NO_LXOR_EQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_uint64
 GrB_Info GB_Adot2B__lxor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_uint64
 GrB_Info GB_Adot3B__lxor_eq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_eq_uint8.c b/Source/Generated/GB_AxB__lxor_eq_uint8.c
index b162ee9d7e..b56edba957 100644
--- a/Source/Generated/GB_AxB__lxor_eq_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_eq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_eq_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_eq_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_eq_uint8
-// A*B function (heap):      GB_AheapB__lxor_eq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_eq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_eq_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik == bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik == bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik == bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_EQ_UINT8 || GxB_NO_LXOR_EQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_eq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_eq_uint8
 GrB_Info GB_Adot2B__lxor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_eq_uint8
 GrB_Info GB_Adot3B__lxor_eq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_eq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_eq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_eq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_first_bool.c b/Source/Generated/GB_AxB__lxor_first_bool.c
index 3c55e3069d..ec4f795a6c 100644
--- a/Source/Generated/GB_AxB__lxor_first_bool.c
+++ b/Source/Generated/GB_AxB__lxor_first_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_first_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_first_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_first_bool
-// A*B function (heap):      GB_AheapB__lxor_first_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_first_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_first_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = aik
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != aik)
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= aik
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_FIRST_BOOL || GxB_NO_LXOR_FIRST_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_first_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_first_bool
 GrB_Info GB_Adot2B__lxor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_first_bool
 GrB_Info GB_Adot3B__lxor_first_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_first_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_first_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_first_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_bool.c b/Source/Generated/GB_AxB__lxor_ge_bool.c
index 4994ad7749..497c6cf363 100644
--- a/Source/Generated/GB_AxB__lxor_ge_bool.c
+++ b/Source/Generated/GB_AxB__lxor_ge_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_bool
-// A*B function (heap):      GB_AheapB__lxor_ge_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_GE_BOOL || GxB_NO_LXOR_GE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_bool
 GrB_Info GB_Adot2B__lxor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_bool
 GrB_Info GB_Adot3B__lxor_ge_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_fp32.c b/Source/Generated/GB_AxB__lxor_ge_fp32.c
index 3ac0174ac1..bf90b9e650 100644
--- a/Source/Generated/GB_AxB__lxor_ge_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_ge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_fp32
-// A*B function (heap):      GB_AheapB__lxor_ge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_GE_FP32 || GxB_NO_LXOR_GE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_fp32
 GrB_Info GB_Adot2B__lxor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_fp32
 GrB_Info GB_Adot3B__lxor_ge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_fp64.c b/Source/Generated/GB_AxB__lxor_ge_fp64.c
index 2ebdfc46e1..c30d8955bd 100644
--- a/Source/Generated/GB_AxB__lxor_ge_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_ge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_fp64
-// A*B function (heap):      GB_AheapB__lxor_ge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_GE_FP64 || GxB_NO_LXOR_GE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_fp64
 GrB_Info GB_Adot2B__lxor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_fp64
 GrB_Info GB_Adot3B__lxor_ge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_int16.c b/Source/Generated/GB_AxB__lxor_ge_int16.c
index 2ae9578c58..97f589d79b 100644
--- a/Source/Generated/GB_AxB__lxor_ge_int16.c
+++ b/Source/Generated/GB_AxB__lxor_ge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_int16
-// A*B function (heap):      GB_AheapB__lxor_ge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_GE_INT16 || GxB_NO_LXOR_GE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_int16
 GrB_Info GB_Adot2B__lxor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_int16
 GrB_Info GB_Adot3B__lxor_ge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_int32.c b/Source/Generated/GB_AxB__lxor_ge_int32.c
index 04b1f9a7f7..79905a8dda 100644
--- a/Source/Generated/GB_AxB__lxor_ge_int32.c
+++ b/Source/Generated/GB_AxB__lxor_ge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_int32
-// A*B function (heap):      GB_AheapB__lxor_ge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_GE_INT32 || GxB_NO_LXOR_GE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_int32
 GrB_Info GB_Adot2B__lxor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_int32
 GrB_Info GB_Adot3B__lxor_ge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_int64.c b/Source/Generated/GB_AxB__lxor_ge_int64.c
index 437d57d7fd..acf99d5b9f 100644
--- a/Source/Generated/GB_AxB__lxor_ge_int64.c
+++ b/Source/Generated/GB_AxB__lxor_ge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_int64
-// A*B function (heap):      GB_AheapB__lxor_ge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_GE_INT64 || GxB_NO_LXOR_GE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_int64
 GrB_Info GB_Adot2B__lxor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_int64
 GrB_Info GB_Adot3B__lxor_ge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_int8.c b/Source/Generated/GB_AxB__lxor_ge_int8.c
index aaa7774629..62f4f0cbc9 100644
--- a/Source/Generated/GB_AxB__lxor_ge_int8.c
+++ b/Source/Generated/GB_AxB__lxor_ge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_int8
-// A*B function (heap):      GB_AheapB__lxor_ge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_GE_INT8 || GxB_NO_LXOR_GE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_int8
 GrB_Info GB_Adot2B__lxor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_int8
 GrB_Info GB_Adot3B__lxor_ge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_uint16.c b/Source/Generated/GB_AxB__lxor_ge_uint16.c
index 1f446fbad8..136e6464ae 100644
--- a/Source/Generated/GB_AxB__lxor_ge_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_ge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_uint16
-// A*B function (heap):      GB_AheapB__lxor_ge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_GE_UINT16 || GxB_NO_LXOR_GE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_uint16
 GrB_Info GB_Adot2B__lxor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_uint16
 GrB_Info GB_Adot3B__lxor_ge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_uint32.c b/Source/Generated/GB_AxB__lxor_ge_uint32.c
index 0513a25f27..153bdd7441 100644
--- a/Source/Generated/GB_AxB__lxor_ge_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_ge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_uint32
-// A*B function (heap):      GB_AheapB__lxor_ge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_GE_UINT32 || GxB_NO_LXOR_GE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_uint32
 GrB_Info GB_Adot2B__lxor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_uint32
 GrB_Info GB_Adot3B__lxor_ge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_uint64.c b/Source/Generated/GB_AxB__lxor_ge_uint64.c
index 851095a542..f6f37f914b 100644
--- a/Source/Generated/GB_AxB__lxor_ge_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_ge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_uint64
-// A*B function (heap):      GB_AheapB__lxor_ge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_GE_UINT64 || GxB_NO_LXOR_GE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_uint64
 GrB_Info GB_Adot2B__lxor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_uint64
 GrB_Info GB_Adot3B__lxor_ge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ge_uint8.c b/Source/Generated/GB_AxB__lxor_ge_uint8.c
index 3f2b3f7dc0..70544d3e1c 100644
--- a/Source/Generated/GB_AxB__lxor_ge_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_ge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ge_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_ge_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_ge_uint8
-// A*B function (heap):      GB_AheapB__lxor_ge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ge_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik >= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik >= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik >= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_GE_UINT8 || GxB_NO_LXOR_GE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ge_uint8
 GrB_Info GB_Adot2B__lxor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ge_uint8
 GrB_Info GB_Adot3B__lxor_ge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_bool.c b/Source/Generated/GB_AxB__lxor_gt_bool.c
index 91cdbf20a9..71fa44bb4c 100644
--- a/Source/Generated/GB_AxB__lxor_gt_bool.c
+++ b/Source/Generated/GB_AxB__lxor_gt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_bool
-// A*B function (heap):      GB_AheapB__lxor_gt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_GT_BOOL || GxB_NO_LXOR_GT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_bool
 GrB_Info GB_Adot2B__lxor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_bool
 GrB_Info GB_Adot3B__lxor_gt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_fp32.c b/Source/Generated/GB_AxB__lxor_gt_fp32.c
index dc3bdfb5f0..7bd7b81e2a 100644
--- a/Source/Generated/GB_AxB__lxor_gt_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_gt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_fp32
-// A*B function (heap):      GB_AheapB__lxor_gt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_GT_FP32 || GxB_NO_LXOR_GT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_fp32
 GrB_Info GB_Adot2B__lxor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_fp32
 GrB_Info GB_Adot3B__lxor_gt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_fp64.c b/Source/Generated/GB_AxB__lxor_gt_fp64.c
index a35d39d014..428d389065 100644
--- a/Source/Generated/GB_AxB__lxor_gt_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_gt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_fp64
-// A*B function (heap):      GB_AheapB__lxor_gt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_GT_FP64 || GxB_NO_LXOR_GT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_fp64
 GrB_Info GB_Adot2B__lxor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_fp64
 GrB_Info GB_Adot3B__lxor_gt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_int16.c b/Source/Generated/GB_AxB__lxor_gt_int16.c
index 162aa84f1c..55a09be10b 100644
--- a/Source/Generated/GB_AxB__lxor_gt_int16.c
+++ b/Source/Generated/GB_AxB__lxor_gt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_int16
-// A*B function (heap):      GB_AheapB__lxor_gt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_GT_INT16 || GxB_NO_LXOR_GT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_int16
 GrB_Info GB_Adot2B__lxor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_int16
 GrB_Info GB_Adot3B__lxor_gt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_int32.c b/Source/Generated/GB_AxB__lxor_gt_int32.c
index c74362db1e..f8f9dbea77 100644
--- a/Source/Generated/GB_AxB__lxor_gt_int32.c
+++ b/Source/Generated/GB_AxB__lxor_gt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_int32
-// A*B function (heap):      GB_AheapB__lxor_gt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_GT_INT32 || GxB_NO_LXOR_GT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_int32
 GrB_Info GB_Adot2B__lxor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_int32
 GrB_Info GB_Adot3B__lxor_gt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_int64.c b/Source/Generated/GB_AxB__lxor_gt_int64.c
index 81ec642986..6c8d15b7f0 100644
--- a/Source/Generated/GB_AxB__lxor_gt_int64.c
+++ b/Source/Generated/GB_AxB__lxor_gt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_int64
-// A*B function (heap):      GB_AheapB__lxor_gt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_GT_INT64 || GxB_NO_LXOR_GT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_int64
 GrB_Info GB_Adot2B__lxor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_int64
 GrB_Info GB_Adot3B__lxor_gt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_int8.c b/Source/Generated/GB_AxB__lxor_gt_int8.c
index ec1ad5e81e..68c3091eb9 100644
--- a/Source/Generated/GB_AxB__lxor_gt_int8.c
+++ b/Source/Generated/GB_AxB__lxor_gt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_int8
-// A*B function (heap):      GB_AheapB__lxor_gt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_GT_INT8 || GxB_NO_LXOR_GT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_int8
 GrB_Info GB_Adot2B__lxor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_int8
 GrB_Info GB_Adot3B__lxor_gt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_uint16.c b/Source/Generated/GB_AxB__lxor_gt_uint16.c
index 0a7279731a..04ac3c37ba 100644
--- a/Source/Generated/GB_AxB__lxor_gt_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_gt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_uint16
-// A*B function (heap):      GB_AheapB__lxor_gt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_GT_UINT16 || GxB_NO_LXOR_GT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_uint16
 GrB_Info GB_Adot2B__lxor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_uint16
 GrB_Info GB_Adot3B__lxor_gt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_uint32.c b/Source/Generated/GB_AxB__lxor_gt_uint32.c
index e70c803154..41c1e4bec9 100644
--- a/Source/Generated/GB_AxB__lxor_gt_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_gt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_uint32
-// A*B function (heap):      GB_AheapB__lxor_gt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_GT_UINT32 || GxB_NO_LXOR_GT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_uint32
 GrB_Info GB_Adot2B__lxor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_uint32
 GrB_Info GB_Adot3B__lxor_gt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_uint64.c b/Source/Generated/GB_AxB__lxor_gt_uint64.c
index 265a20e19a..8362a2ee04 100644
--- a/Source/Generated/GB_AxB__lxor_gt_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_gt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_uint64
-// A*B function (heap):      GB_AheapB__lxor_gt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_GT_UINT64 || GxB_NO_LXOR_GT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_uint64
 GrB_Info GB_Adot2B__lxor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_uint64
 GrB_Info GB_Adot3B__lxor_gt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_gt_uint8.c b/Source/Generated/GB_AxB__lxor_gt_uint8.c
index ab9678026a..66c73c936f 100644
--- a/Source/Generated/GB_AxB__lxor_gt_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_gt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_gt_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_gt_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_gt_uint8
-// A*B function (heap):      GB_AheapB__lxor_gt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_gt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_gt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik > bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik > bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik > bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_GT_UINT8 || GxB_NO_LXOR_GT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_gt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_gt_uint8
 GrB_Info GB_Adot2B__lxor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_gt_uint8
 GrB_Info GB_Adot3B__lxor_gt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_gt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_gt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_gt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_land_bool.c b/Source/Generated/GB_AxB__lxor_land_bool.c
index 5857466a4f..efdca1ce1f 100644
--- a/Source/Generated/GB_AxB__lxor_land_bool.c
+++ b/Source/Generated/GB_AxB__lxor_land_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_land_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_land_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_land_bool
-// A*B function (heap):      GB_AheapB__lxor_land_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_land_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_land_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik && bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik && bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik && bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x && y) ;
+#define GB_MULT(z, x, y) \
+    z = (x && y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x && y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x && y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LAND_BOOL || GxB_NO_LXOR_LAND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_land_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_land_bool
 GrB_Info GB_Adot2B__lxor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_land_bool
 GrB_Info GB_Adot3B__lxor_land_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_land_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_land_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_land_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_bool.c b/Source/Generated/GB_AxB__lxor_le_bool.c
index b264703613..9d7690e3d8 100644
--- a/Source/Generated/GB_AxB__lxor_le_bool.c
+++ b/Source/Generated/GB_AxB__lxor_le_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_le_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_le_bool
-// A*B function (heap):      GB_AheapB__lxor_le_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LE_BOOL || GxB_NO_LXOR_LE_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_bool
 GrB_Info GB_Adot2B__lxor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_bool
 GrB_Info GB_Adot3B__lxor_le_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_fp32.c b/Source/Generated/GB_AxB__lxor_le_fp32.c
index da64d25ba4..bc065772af 100644
--- a/Source/Generated/GB_AxB__lxor_le_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_le_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_le_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_le_fp32
-// A*B function (heap):      GB_AheapB__lxor_le_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_LE_FP32 || GxB_NO_LXOR_LE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_fp32
 GrB_Info GB_Adot2B__lxor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_fp32
 GrB_Info GB_Adot3B__lxor_le_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_fp64.c b/Source/Generated/GB_AxB__lxor_le_fp64.c
index e1bd27371b..abdce020b6 100644
--- a/Source/Generated/GB_AxB__lxor_le_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_le_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_le_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_le_fp64
-// A*B function (heap):      GB_AheapB__lxor_le_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_LE_FP64 || GxB_NO_LXOR_LE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_fp64
 GrB_Info GB_Adot2B__lxor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_fp64
 GrB_Info GB_Adot3B__lxor_le_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_int16.c b/Source/Generated/GB_AxB__lxor_le_int16.c
index eebfdfce84..80b44ecf79 100644
--- a/Source/Generated/GB_AxB__lxor_le_int16.c
+++ b/Source/Generated/GB_AxB__lxor_le_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_le_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_le_int16
-// A*B function (heap):      GB_AheapB__lxor_le_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_LE_INT16 || GxB_NO_LXOR_LE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_int16
 GrB_Info GB_Adot2B__lxor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_int16
 GrB_Info GB_Adot3B__lxor_le_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_int32.c b/Source/Generated/GB_AxB__lxor_le_int32.c
index d46fea8077..86d2f3fac0 100644
--- a/Source/Generated/GB_AxB__lxor_le_int32.c
+++ b/Source/Generated/GB_AxB__lxor_le_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_le_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_le_int32
-// A*B function (heap):      GB_AheapB__lxor_le_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_LE_INT32 || GxB_NO_LXOR_LE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_int32
 GrB_Info GB_Adot2B__lxor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_int32
 GrB_Info GB_Adot3B__lxor_le_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_int64.c b/Source/Generated/GB_AxB__lxor_le_int64.c
index b2c6c530d9..5230619aee 100644
--- a/Source/Generated/GB_AxB__lxor_le_int64.c
+++ b/Source/Generated/GB_AxB__lxor_le_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_le_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_le_int64
-// A*B function (heap):      GB_AheapB__lxor_le_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_LE_INT64 || GxB_NO_LXOR_LE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_int64
 GrB_Info GB_Adot2B__lxor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_int64
 GrB_Info GB_Adot3B__lxor_le_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_int8.c b/Source/Generated/GB_AxB__lxor_le_int8.c
index 4005e84350..72e8284fec 100644
--- a/Source/Generated/GB_AxB__lxor_le_int8.c
+++ b/Source/Generated/GB_AxB__lxor_le_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_le_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_le_int8
-// A*B function (heap):      GB_AheapB__lxor_le_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_LE_INT8 || GxB_NO_LXOR_LE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_int8
 GrB_Info GB_Adot2B__lxor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_int8
 GrB_Info GB_Adot3B__lxor_le_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_uint16.c b/Source/Generated/GB_AxB__lxor_le_uint16.c
index dc3e68bf76..8d759ee54c 100644
--- a/Source/Generated/GB_AxB__lxor_le_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_le_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_le_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_le_uint16
-// A*B function (heap):      GB_AheapB__lxor_le_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_LE_UINT16 || GxB_NO_LXOR_LE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_uint16
 GrB_Info GB_Adot2B__lxor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_uint16
 GrB_Info GB_Adot3B__lxor_le_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_uint32.c b/Source/Generated/GB_AxB__lxor_le_uint32.c
index 01563ab6a6..1b78a6117e 100644
--- a/Source/Generated/GB_AxB__lxor_le_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_le_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_le_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_le_uint32
-// A*B function (heap):      GB_AheapB__lxor_le_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_LE_UINT32 || GxB_NO_LXOR_LE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_uint32
 GrB_Info GB_Adot2B__lxor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_uint32
 GrB_Info GB_Adot3B__lxor_le_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_uint64.c b/Source/Generated/GB_AxB__lxor_le_uint64.c
index 9b0c24eef4..fdf811bfff 100644
--- a/Source/Generated/GB_AxB__lxor_le_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_le_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_le_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_le_uint64
-// A*B function (heap):      GB_AheapB__lxor_le_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_LE_UINT64 || GxB_NO_LXOR_LE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_uint64
 GrB_Info GB_Adot2B__lxor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_uint64
 GrB_Info GB_Adot3B__lxor_le_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_le_uint8.c b/Source/Generated/GB_AxB__lxor_le_uint8.c
index 92bc0d0ef5..edc3571aca 100644
--- a/Source/Generated/GB_AxB__lxor_le_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_le_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_le_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_le_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_le_uint8
-// A*B function (heap):      GB_AheapB__lxor_le_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_le_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_le_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik <= bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik <= bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik <= bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_LE_UINT8 || GxB_NO_LXOR_LE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_le_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_le_uint8
 GrB_Info GB_Adot2B__lxor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_le_uint8
 GrB_Info GB_Adot3B__lxor_le_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_le_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_le_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_le_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lor_bool.c b/Source/Generated/GB_AxB__lxor_lor_bool.c
index d52b9af845..28d8d956d8 100644
--- a/Source/Generated/GB_AxB__lxor_lor_bool.c
+++ b/Source/Generated/GB_AxB__lxor_lor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lor_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_lor_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_lor_bool
-// A*B function (heap):      GB_AheapB__lxor_lor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik || bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik || bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik || bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x || y) ;
+#define GB_MULT(z, x, y) \
+    z = (x || y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x || y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x || y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LOR_BOOL || GxB_NO_LXOR_LOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lor_bool
 GrB_Info GB_Adot2B__lxor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lor_bool
 GrB_Info GB_Adot3B__lxor_lor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_bool.c b/Source/Generated/GB_AxB__lxor_lt_bool.c
index 9255c3e55a..475d4c9db8 100644
--- a/Source/Generated/GB_AxB__lxor_lt_bool.c
+++ b/Source/Generated/GB_AxB__lxor_lt_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_bool
-// A*B function (heap):      GB_AheapB__lxor_lt_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LT_BOOL || GxB_NO_LXOR_LT_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_bool
 GrB_Info GB_Adot2B__lxor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_bool
 GrB_Info GB_Adot3B__lxor_lt_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_fp32.c b/Source/Generated/GB_AxB__lxor_lt_fp32.c
index 7d29784750..2b81acfc20 100644
--- a/Source/Generated/GB_AxB__lxor_lt_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_lt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_fp32
-// A*B function (heap):      GB_AheapB__lxor_lt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_LT_FP32 || GxB_NO_LXOR_LT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_fp32
 GrB_Info GB_Adot2B__lxor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_fp32
 GrB_Info GB_Adot3B__lxor_lt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_fp64.c b/Source/Generated/GB_AxB__lxor_lt_fp64.c
index 9122a2e908..55aeb74e96 100644
--- a/Source/Generated/GB_AxB__lxor_lt_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_lt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_fp64
-// A*B function (heap):      GB_AheapB__lxor_lt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_LT_FP64 || GxB_NO_LXOR_LT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_fp64
 GrB_Info GB_Adot2B__lxor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_fp64
 GrB_Info GB_Adot3B__lxor_lt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_int16.c b/Source/Generated/GB_AxB__lxor_lt_int16.c
index 60305c66fb..de8758905a 100644
--- a/Source/Generated/GB_AxB__lxor_lt_int16.c
+++ b/Source/Generated/GB_AxB__lxor_lt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_int16
-// A*B function (heap):      GB_AheapB__lxor_lt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_LT_INT16 || GxB_NO_LXOR_LT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_int16
 GrB_Info GB_Adot2B__lxor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_int16
 GrB_Info GB_Adot3B__lxor_lt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_int32.c b/Source/Generated/GB_AxB__lxor_lt_int32.c
index 8cea7095c5..526d6ba407 100644
--- a/Source/Generated/GB_AxB__lxor_lt_int32.c
+++ b/Source/Generated/GB_AxB__lxor_lt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_int32
-// A*B function (heap):      GB_AheapB__lxor_lt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_LT_INT32 || GxB_NO_LXOR_LT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_int32
 GrB_Info GB_Adot2B__lxor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_int32
 GrB_Info GB_Adot3B__lxor_lt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_int64.c b/Source/Generated/GB_AxB__lxor_lt_int64.c
index 30d5423588..d3208cd58b 100644
--- a/Source/Generated/GB_AxB__lxor_lt_int64.c
+++ b/Source/Generated/GB_AxB__lxor_lt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_int64
-// A*B function (heap):      GB_AheapB__lxor_lt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_LT_INT64 || GxB_NO_LXOR_LT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_int64
 GrB_Info GB_Adot2B__lxor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_int64
 GrB_Info GB_Adot3B__lxor_lt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_int8.c b/Source/Generated/GB_AxB__lxor_lt_int8.c
index 2b02e36e99..6fd23f3671 100644
--- a/Source/Generated/GB_AxB__lxor_lt_int8.c
+++ b/Source/Generated/GB_AxB__lxor_lt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_int8
-// A*B function (heap):      GB_AheapB__lxor_lt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_LT_INT8 || GxB_NO_LXOR_LT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_int8
 GrB_Info GB_Adot2B__lxor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_int8
 GrB_Info GB_Adot3B__lxor_lt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_uint16.c b/Source/Generated/GB_AxB__lxor_lt_uint16.c
index 57d2acbb58..74bb9384c1 100644
--- a/Source/Generated/GB_AxB__lxor_lt_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_lt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_uint16
-// A*B function (heap):      GB_AheapB__lxor_lt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_LT_UINT16 || GxB_NO_LXOR_LT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_uint16
 GrB_Info GB_Adot2B__lxor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_uint16
 GrB_Info GB_Adot3B__lxor_lt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_uint32.c b/Source/Generated/GB_AxB__lxor_lt_uint32.c
index 49b42b0982..87d505a922 100644
--- a/Source/Generated/GB_AxB__lxor_lt_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_lt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_uint32
-// A*B function (heap):      GB_AheapB__lxor_lt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_LT_UINT32 || GxB_NO_LXOR_LT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_uint32
 GrB_Info GB_Adot2B__lxor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_uint32
 GrB_Info GB_Adot3B__lxor_lt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_uint64.c b/Source/Generated/GB_AxB__lxor_lt_uint64.c
index dbd3764808..6b1886eb2a 100644
--- a/Source/Generated/GB_AxB__lxor_lt_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_lt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_uint64
-// A*B function (heap):      GB_AheapB__lxor_lt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_LT_UINT64 || GxB_NO_LXOR_LT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_uint64
 GrB_Info GB_Adot2B__lxor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_uint64
 GrB_Info GB_Adot3B__lxor_lt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lt_uint8.c b/Source/Generated/GB_AxB__lxor_lt_uint8.c
index 7007db3885..aee1940256 100644
--- a/Source/Generated/GB_AxB__lxor_lt_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_lt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lt_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_lt_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_lt_uint8
-// A*B function (heap):      GB_AheapB__lxor_lt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lt_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik < bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik < bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik < bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_LT_UINT8 || GxB_NO_LXOR_LT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lt_uint8
 GrB_Info GB_Adot2B__lxor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lt_uint8
 GrB_Info GB_Adot3B__lxor_lt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_lxor_bool.c b/Source/Generated/GB_AxB__lxor_lxor_bool.c
index 0c3b93042c..a4a5121a4b 100644
--- a/Source/Generated/GB_AxB__lxor_lxor_bool.c
+++ b/Source/Generated/GB_AxB__lxor_lxor_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_lxor_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_lxor_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_lxor_bool
-// A*B function (heap):      GB_AheapB__lxor_lxor_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_lxor_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_lxor_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_LXOR_LXOR_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_lxor_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_lxor_bool
 GrB_Info GB_Adot2B__lxor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_lxor_bool
 GrB_Info GB_Adot3B__lxor_lxor_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_lxor_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_lxor_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_lxor_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_fp32.c b/Source/Generated/GB_AxB__lxor_ne_fp32.c
index 43bc14b588..e5aea47359 100644
--- a/Source/Generated/GB_AxB__lxor_ne_fp32.c
+++ b/Source/Generated/GB_AxB__lxor_ne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_fp32
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_fp32
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_fp32
-// A*B function (heap):      GB_AheapB__lxor_ne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_fp32
 
 // C type:   bool
 // A type:   float
 // B type:   float
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_FP32 || GxB_NO_LXOR_BOOL || GxB_NO_NE_FP32 || GxB_NO_LXOR_NE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_fp32
 GrB_Info GB_Adot2B__lxor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_fp32
 GrB_Info GB_Adot3B__lxor_ne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_fp64.c b/Source/Generated/GB_AxB__lxor_ne_fp64.c
index b994d59abb..312388643d 100644
--- a/Source/Generated/GB_AxB__lxor_ne_fp64.c
+++ b/Source/Generated/GB_AxB__lxor_ne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_fp64
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_fp64
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_fp64
-// A*B function (heap):      GB_AheapB__lxor_ne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_fp64
 
 // C type:   bool
 // A type:   double
 // B type:   double
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_FP64 || GxB_NO_LXOR_BOOL || GxB_NO_NE_FP64 || GxB_NO_LXOR_NE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_fp64
 GrB_Info GB_Adot2B__lxor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_fp64
 GrB_Info GB_Adot3B__lxor_ne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_int16.c b/Source/Generated/GB_AxB__lxor_ne_int16.c
index c73c87b0c2..4cb3299a5a 100644
--- a/Source/Generated/GB_AxB__lxor_ne_int16.c
+++ b/Source/Generated/GB_AxB__lxor_ne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_int16
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_int16
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_int16
-// A*B function (heap):      GB_AheapB__lxor_ne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_int16
 
 // C type:   bool
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_INT16 || GxB_NO_LXOR_BOOL || GxB_NO_NE_INT16 || GxB_NO_LXOR_NE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_int16
 GrB_Info GB_Adot2B__lxor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_int16
 GrB_Info GB_Adot3B__lxor_ne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_int32.c b/Source/Generated/GB_AxB__lxor_ne_int32.c
index 8fe8fefe62..b1a8376339 100644
--- a/Source/Generated/GB_AxB__lxor_ne_int32.c
+++ b/Source/Generated/GB_AxB__lxor_ne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_int32
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_int32
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_int32
-// A*B function (heap):      GB_AheapB__lxor_ne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_int32
 
 // C type:   bool
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_INT32 || GxB_NO_LXOR_BOOL || GxB_NO_NE_INT32 || GxB_NO_LXOR_NE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_int32
 GrB_Info GB_Adot2B__lxor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_int32
 GrB_Info GB_Adot3B__lxor_ne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_int64.c b/Source/Generated/GB_AxB__lxor_ne_int64.c
index 37805cdc05..431af499c3 100644
--- a/Source/Generated/GB_AxB__lxor_ne_int64.c
+++ b/Source/Generated/GB_AxB__lxor_ne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_int64
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_int64
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_int64
-// A*B function (heap):      GB_AheapB__lxor_ne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_int64
 
 // C type:   bool
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_INT64 || GxB_NO_LXOR_BOOL || GxB_NO_NE_INT64 || GxB_NO_LXOR_NE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_int64
 GrB_Info GB_Adot2B__lxor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_int64
 GrB_Info GB_Adot3B__lxor_ne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_int8.c b/Source/Generated/GB_AxB__lxor_ne_int8.c
index df20300c07..b5c2925053 100644
--- a/Source/Generated/GB_AxB__lxor_ne_int8.c
+++ b/Source/Generated/GB_AxB__lxor_ne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_int8
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_int8
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_int8
-// A*B function (heap):      GB_AheapB__lxor_ne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_int8
 
 // C type:   bool
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_INT8 || GxB_NO_LXOR_BOOL || GxB_NO_NE_INT8 || GxB_NO_LXOR_NE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_int8
 GrB_Info GB_Adot2B__lxor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_int8
 GrB_Info GB_Adot3B__lxor_ne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_uint16.c b/Source/Generated/GB_AxB__lxor_ne_uint16.c
index 86c7b97f90..c608bf5117 100644
--- a/Source/Generated/GB_AxB__lxor_ne_uint16.c
+++ b/Source/Generated/GB_AxB__lxor_ne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_uint16
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_uint16
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_uint16
-// A*B function (heap):      GB_AheapB__lxor_ne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_uint16
 
 // C type:   bool
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_LXOR_BOOL || GxB_NO_NE_UINT16 || GxB_NO_LXOR_NE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_uint16
 GrB_Info GB_Adot2B__lxor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_uint16
 GrB_Info GB_Adot3B__lxor_ne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_uint32.c b/Source/Generated/GB_AxB__lxor_ne_uint32.c
index da7d6f6fd6..2dd06950df 100644
--- a/Source/Generated/GB_AxB__lxor_ne_uint32.c
+++ b/Source/Generated/GB_AxB__lxor_ne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_uint32
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_uint32
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_uint32
-// A*B function (heap):      GB_AheapB__lxor_ne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_uint32
 
 // C type:   bool
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_LXOR_BOOL || GxB_NO_NE_UINT32 || GxB_NO_LXOR_NE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_uint32
 GrB_Info GB_Adot2B__lxor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_uint32
 GrB_Info GB_Adot3B__lxor_ne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_uint64.c b/Source/Generated/GB_AxB__lxor_ne_uint64.c
index 55446d0285..7a918e5624 100644
--- a/Source/Generated/GB_AxB__lxor_ne_uint64.c
+++ b/Source/Generated/GB_AxB__lxor_ne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_uint64
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_uint64
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_uint64
-// A*B function (heap):      GB_AheapB__lxor_ne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_uint64
 
 // C type:   bool
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_LXOR_BOOL || GxB_NO_NE_UINT64 || GxB_NO_LXOR_NE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_uint64
 GrB_Info GB_Adot2B__lxor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_uint64
 GrB_Info GB_Adot3B__lxor_ne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_ne_uint8.c b/Source/Generated/GB_AxB__lxor_ne_uint8.c
index a301e3a9c9..3cc973ee58 100644
--- a/Source/Generated/GB_AxB__lxor_ne_uint8.c
+++ b/Source/Generated/GB_AxB__lxor_ne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_ne_uint8
 // A'*B function (dot2):     GB_Adot2B__lxor_ne_uint8
 // A'*B function (dot3):     GB_Adot3B__lxor_ne_uint8
-// A*B function (heap):      GB_AheapB__lxor_ne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_ne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_ne_uint8
 
 // C type:   bool
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik != bkj)
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != (aik != bkj))
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= (aik != bkj)
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_LXOR_BOOL || GxB_NO_NE_UINT8 || GxB_NO_LXOR_NE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_ne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_ne_uint8
 GrB_Info GB_Adot2B__lxor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_ne_uint8
 GrB_Info GB_Adot3B__lxor_ne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_ne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_ne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_ne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__lxor_pair_bool.c b/Source/Generated/GB_AxB__lxor_pair_bool.c
new file mode 100644
index 0000000000..9ce489b8e6
--- /dev/null
+++ b/Source/Generated/GB_AxB__lxor_pair_bool.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__lxor_pair_bool
+// A'*B function (dot3):     GB_Adot3B__lxor_pair_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_pair_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_pair_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+
+// Multiply: z = 1
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= 1
+// Identity: false
+// Terminal: ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z ^= 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    false
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_LXOR || GxB_NO_PAIR || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_PAIR_BOOL || GxB_NO_LXOR_PAIR_BOOL)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__lxor_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__lxor_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__lxor_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__lxor_second_bool.c b/Source/Generated/GB_AxB__lxor_second_bool.c
index 38c8be49b6..39c93b3ec4 100644
--- a/Source/Generated/GB_AxB__lxor_second_bool.c
+++ b/Source/Generated/GB_AxB__lxor_second_bool.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__lxor_second_bool
 // A'*B function (dot2):     GB_Adot2B__lxor_second_bool
 // A'*B function (dot3):     GB_Adot3B__lxor_second_bool
-// A*B function (heap):      GB_AheapB__lxor_second_bool
+// C+=A'*B function (dot4):  GB_Adot4B__lxor_second_bool
+// A*B function (saxpy3):    GB_Asaxpy3B__lxor_second_bool
 
 // C type:   bool
 // A type:   bool
 // B type:   bool
 
 // Multiply: z = bkj
-// Add:      cij = (cij != z)
-// MultAdd:  cij = (cij != bkj)
+// Add:      cij ^= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij ^= bkj
 // Identity: false
 // Terminal: ;
 
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = (z != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z ^= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     false
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    bool cij ;
+#define GB_CIJ_DECLARE(cij) \
+    bool cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] ^= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x ^ y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    bool
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0x1L
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] ^= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(bool))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_LXOR_BOOL || GxB_NO_SECOND_BOOL || GxB_NO_LXOR_SECOND_BOOL)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__lxor_second_bool
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    bool *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    bool *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__lxor_second_bool
 GrB_Info GB_Adot2B__lxor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__lxor_second_bool
 GrB_Info GB_Adot3B__lxor_second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__lxor_second_bool
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__lxor_second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__lxor_second_bool
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__lxor_second_bool
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    bool *GB_RESTRICT Cx = C->x ;
-    bool cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_fp32.c b/Source/Generated/GB_AxB__max_div_fp32.c
index 9484b3a3d3..06acf8d1ac 100644
--- a/Source/Generated/GB_AxB__max_div_fp32.c
+++ b/Source/Generated/GB_AxB__max_div_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_fp32
 // A'*B function (dot2):     GB_Adot2B__max_div_fp32
 // A'*B function (dot3):     GB_Adot3B__max_div_fp32
-// A*B function (heap):      GB_AheapB__max_div_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik / bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x / y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x / y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_DIV_FP32 || GxB_NO_MAX_DIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_div_fp32
 GrB_Info GB_Adot2B__max_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_fp32
 GrB_Info GB_Adot3B__max_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_div_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_div_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_fp64.c b/Source/Generated/GB_AxB__max_div_fp64.c
index 4ccb6505cb..901b8c2cbc 100644
--- a/Source/Generated/GB_AxB__max_div_fp64.c
+++ b/Source/Generated/GB_AxB__max_div_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_fp64
 // A'*B function (dot2):     GB_Adot2B__max_div_fp64
 // A'*B function (dot3):     GB_Adot3B__max_div_fp64
-// A*B function (heap):      GB_AheapB__max_div_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik / bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x / y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x / y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_DIV_FP64 || GxB_NO_MAX_DIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_div_fp64
 GrB_Info GB_Adot2B__max_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_fp64
 GrB_Info GB_Adot3B__max_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_div_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_div_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_int16.c b/Source/Generated/GB_AxB__max_div_int16.c
index b51985d693..59825f6c54 100644
--- a/Source/Generated/GB_AxB__max_div_int16.c
+++ b/Source/Generated/GB_AxB__max_div_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_int16
 // A'*B function (dot2):     GB_Adot2B__max_div_int16
 // A'*B function (dot3):     GB_Adot3B__max_div_int16
-// A*B function (heap):      GB_AheapB__max_div_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 16)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 16) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_DIV_INT16 || GxB_NO_MAX_DIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_int16
 GrB_Info GB_Adot2B__max_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_int16
 GrB_Info GB_Adot3B__max_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_int16
+GrB_Info GB_Asaxpy3B__max_div_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_int32.c b/Source/Generated/GB_AxB__max_div_int32.c
index 5b78213f76..56eb8bd63a 100644
--- a/Source/Generated/GB_AxB__max_div_int32.c
+++ b/Source/Generated/GB_AxB__max_div_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_int32
 // A'*B function (dot2):     GB_Adot2B__max_div_int32
 // A'*B function (dot3):     GB_Adot3B__max_div_int32
-// A*B function (heap):      GB_AheapB__max_div_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 32)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 32) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_DIV_INT32 || GxB_NO_MAX_DIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_int32
 GrB_Info GB_Adot2B__max_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_int32
 GrB_Info GB_Adot3B__max_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_int32
+GrB_Info GB_Asaxpy3B__max_div_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_int64.c b/Source/Generated/GB_AxB__max_div_int64.c
index 9fedac9750..26b02678cd 100644
--- a/Source/Generated/GB_AxB__max_div_int64.c
+++ b/Source/Generated/GB_AxB__max_div_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_int64
 // A'*B function (dot2):     GB_Adot2B__max_div_int64
 // A'*B function (dot3):     GB_Adot3B__max_div_int64
-// A*B function (heap):      GB_AheapB__max_div_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 64)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 64) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_DIV_INT64 || GxB_NO_MAX_DIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_int64
 GrB_Info GB_Adot2B__max_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_int64
 GrB_Info GB_Adot3B__max_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_int64
+GrB_Info GB_Asaxpy3B__max_div_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_int8.c b/Source/Generated/GB_AxB__max_div_int8.c
index d7166a36d8..d85e61bef4 100644
--- a/Source/Generated/GB_AxB__max_div_int8.c
+++ b/Source/Generated/GB_AxB__max_div_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_int8
 // A'*B function (dot2):     GB_Adot2B__max_div_int8
 // A'*B function (dot3):     GB_Adot3B__max_div_int8
-// A*B function (heap):      GB_AheapB__max_div_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 8)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 8) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_DIV_INT8 || GxB_NO_MAX_DIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_int8
 GrB_Info GB_Adot2B__max_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_int8
 GrB_Info GB_Adot3B__max_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_int8
+GrB_Info GB_Asaxpy3B__max_div_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_uint16.c b/Source/Generated/GB_AxB__max_div_uint16.c
index 72ea5a12d2..2900db4091 100644
--- a/Source/Generated/GB_AxB__max_div_uint16.c
+++ b/Source/Generated/GB_AxB__max_div_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_uint16
 // A'*B function (dot2):     GB_Adot2B__max_div_uint16
 // A'*B function (dot3):     GB_Adot3B__max_div_uint16
-// A*B function (heap):      GB_AheapB__max_div_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 16)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 16) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_DIV_UINT16 || GxB_NO_MAX_DIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_uint16
 GrB_Info GB_Adot2B__max_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_uint16
 GrB_Info GB_Adot3B__max_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_uint16
+GrB_Info GB_Asaxpy3B__max_div_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_uint32.c b/Source/Generated/GB_AxB__max_div_uint32.c
index 90c884edd5..0c5c1ba791 100644
--- a/Source/Generated/GB_AxB__max_div_uint32.c
+++ b/Source/Generated/GB_AxB__max_div_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_uint32
 // A'*B function (dot2):     GB_Adot2B__max_div_uint32
 // A'*B function (dot3):     GB_Adot3B__max_div_uint32
-// A*B function (heap):      GB_AheapB__max_div_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 32)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 32) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_DIV_UINT32 || GxB_NO_MAX_DIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_uint32
 GrB_Info GB_Adot2B__max_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_uint32
 GrB_Info GB_Adot3B__max_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_uint32
+GrB_Info GB_Asaxpy3B__max_div_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_uint64.c b/Source/Generated/GB_AxB__max_div_uint64.c
index eb64b3f210..ce25c062c7 100644
--- a/Source/Generated/GB_AxB__max_div_uint64.c
+++ b/Source/Generated/GB_AxB__max_div_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_uint64
 // A'*B function (dot2):     GB_Adot2B__max_div_uint64
 // A'*B function (dot3):     GB_Adot3B__max_div_uint64
-// A*B function (heap):      GB_AheapB__max_div_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 64)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 64) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_DIV_UINT64 || GxB_NO_MAX_DIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_uint64
 GrB_Info GB_Adot2B__max_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_uint64
 GrB_Info GB_Adot3B__max_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_div_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_div_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_div_uint8.c b/Source/Generated/GB_AxB__max_div_uint8.c
index 3db85024c3..c9a99dbda4 100644
--- a/Source/Generated/GB_AxB__max_div_uint8.c
+++ b/Source/Generated/GB_AxB__max_div_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_div_uint8
 // A'*B function (dot2):     GB_Adot2B__max_div_uint8
 // A'*B function (dot3):     GB_Adot3B__max_div_uint8
-// A*B function (heap):      GB_AheapB__max_div_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_div_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_div_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 8)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 8) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_DIV_UINT8 || GxB_NO_MAX_DIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_div_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_div_uint8
 GrB_Info GB_Adot2B__max_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_div_uint8
 GrB_Info GB_Adot3B__max_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_div_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_div_uint8
+GrB_Info GB_Asaxpy3B__max_div_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_fp32.c b/Source/Generated/GB_AxB__max_first_fp32.c
index 8dfefc7af0..8df81afd84 100644
--- a/Source/Generated/GB_AxB__max_first_fp32.c
+++ b/Source/Generated/GB_AxB__max_first_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_fp32
 // A'*B function (dot2):     GB_Adot2B__max_first_fp32
 // A'*B function (dot3):     GB_Adot3B__max_first_fp32
-// A*B function (heap):      GB_AheapB__max_first_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, aik)
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_FIRST_FP32 || GxB_NO_MAX_FIRST_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_fp32
 GrB_Info GB_Adot2B__max_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_fp32
 GrB_Info GB_Adot3B__max_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_fp64.c b/Source/Generated/GB_AxB__max_first_fp64.c
index 203465e529..1ad91aff57 100644
--- a/Source/Generated/GB_AxB__max_first_fp64.c
+++ b/Source/Generated/GB_AxB__max_first_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_fp64
 // A'*B function (dot2):     GB_Adot2B__max_first_fp64
 // A'*B function (dot3):     GB_Adot3B__max_first_fp64
-// A*B function (heap):      GB_AheapB__max_first_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, aik)
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_FIRST_FP64 || GxB_NO_MAX_FIRST_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_fp64
 GrB_Info GB_Adot2B__max_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_fp64
 GrB_Info GB_Adot3B__max_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_int16.c b/Source/Generated/GB_AxB__max_first_int16.c
index 33eb43b3aa..267a55f412 100644
--- a/Source/Generated/GB_AxB__max_first_int16.c
+++ b/Source/Generated/GB_AxB__max_first_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_int16
 // A'*B function (dot2):     GB_Adot2B__max_first_int16
 // A'*B function (dot3):     GB_Adot3B__max_first_int16
-// A*B function (heap):      GB_AheapB__max_first_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_FIRST_INT16 || GxB_NO_MAX_FIRST_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_int16
 GrB_Info GB_Adot2B__max_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_int16
 GrB_Info GB_Adot3B__max_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_int32.c b/Source/Generated/GB_AxB__max_first_int32.c
index ea1feea52d..3703a90c77 100644
--- a/Source/Generated/GB_AxB__max_first_int32.c
+++ b/Source/Generated/GB_AxB__max_first_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_int32
 // A'*B function (dot2):     GB_Adot2B__max_first_int32
 // A'*B function (dot3):     GB_Adot3B__max_first_int32
-// A*B function (heap):      GB_AheapB__max_first_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_FIRST_INT32 || GxB_NO_MAX_FIRST_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_int32
 GrB_Info GB_Adot2B__max_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_int32
 GrB_Info GB_Adot3B__max_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_int64.c b/Source/Generated/GB_AxB__max_first_int64.c
index c17dd71265..fc361fdfc4 100644
--- a/Source/Generated/GB_AxB__max_first_int64.c
+++ b/Source/Generated/GB_AxB__max_first_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_int64
 // A'*B function (dot2):     GB_Adot2B__max_first_int64
 // A'*B function (dot3):     GB_Adot3B__max_first_int64
-// A*B function (heap):      GB_AheapB__max_first_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_FIRST_INT64 || GxB_NO_MAX_FIRST_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_int64
 GrB_Info GB_Adot2B__max_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_int64
 GrB_Info GB_Adot3B__max_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_int8.c b/Source/Generated/GB_AxB__max_first_int8.c
index 528b21fb1d..34c1bb2388 100644
--- a/Source/Generated/GB_AxB__max_first_int8.c
+++ b/Source/Generated/GB_AxB__max_first_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_int8
 // A'*B function (dot2):     GB_Adot2B__max_first_int8
 // A'*B function (dot3):     GB_Adot3B__max_first_int8
-// A*B function (heap):      GB_AheapB__max_first_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_FIRST_INT8 || GxB_NO_MAX_FIRST_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_int8
 GrB_Info GB_Adot2B__max_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_int8
 GrB_Info GB_Adot3B__max_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_first_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_uint16.c b/Source/Generated/GB_AxB__max_first_uint16.c
index f4b5cdb42b..d47c09d300 100644
--- a/Source/Generated/GB_AxB__max_first_uint16.c
+++ b/Source/Generated/GB_AxB__max_first_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_uint16
 // A'*B function (dot2):     GB_Adot2B__max_first_uint16
 // A'*B function (dot3):     GB_Adot3B__max_first_uint16
-// A*B function (heap):      GB_AheapB__max_first_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_FIRST_UINT16 || GxB_NO_MAX_FIRST_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_uint16
 GrB_Info GB_Adot2B__max_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_uint16
 GrB_Info GB_Adot3B__max_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_first_uint16
+GrB_Info GB_Asaxpy3B__max_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_uint32.c b/Source/Generated/GB_AxB__max_first_uint32.c
index 0cfb8abd17..d1e433ee22 100644
--- a/Source/Generated/GB_AxB__max_first_uint32.c
+++ b/Source/Generated/GB_AxB__max_first_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_uint32
 // A'*B function (dot2):     GB_Adot2B__max_first_uint32
 // A'*B function (dot3):     GB_Adot3B__max_first_uint32
-// A*B function (heap):      GB_AheapB__max_first_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_FIRST_UINT32 || GxB_NO_MAX_FIRST_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_uint32
 GrB_Info GB_Adot2B__max_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_uint32
 GrB_Info GB_Adot3B__max_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_first_uint32
+GrB_Info GB_Asaxpy3B__max_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_uint64.c b/Source/Generated/GB_AxB__max_first_uint64.c
index 7cc2470c68..bfc8db4773 100644
--- a/Source/Generated/GB_AxB__max_first_uint64.c
+++ b/Source/Generated/GB_AxB__max_first_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_uint64
 // A'*B function (dot2):     GB_Adot2B__max_first_uint64
 // A'*B function (dot3):     GB_Adot3B__max_first_uint64
-// A*B function (heap):      GB_AheapB__max_first_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_FIRST_UINT64 || GxB_NO_MAX_FIRST_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_uint64
 GrB_Info GB_Adot2B__max_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_uint64
 GrB_Info GB_Adot3B__max_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_first_uint64
+GrB_Info GB_Asaxpy3B__max_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_first_uint8.c b/Source/Generated/GB_AxB__max_first_uint8.c
index f5f21e25e0..88baac7d6f 100644
--- a/Source/Generated/GB_AxB__max_first_uint8.c
+++ b/Source/Generated/GB_AxB__max_first_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_first_uint8
 // A'*B function (dot2):     GB_Adot2B__max_first_uint8
 // A'*B function (dot3):     GB_Adot3B__max_first_uint8
-// A*B function (heap):      GB_AheapB__max_first_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_first_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_first_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, aik)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_FIRST_UINT8 || GxB_NO_MAX_FIRST_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_first_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_first_uint8
 GrB_Info GB_Adot2B__max_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_first_uint8
 GrB_Info GB_Adot3B__max_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_first_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_first_uint8
+GrB_Info GB_Asaxpy3B__max_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_fp32.c b/Source/Generated/GB_AxB__max_iseq_fp32.c
index 4c003b6b02..cd2674d3df 100644
--- a/Source/Generated/GB_AxB__max_iseq_fp32.c
+++ b/Source/Generated/GB_AxB__max_iseq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_fp32
 // A'*B function (dot2):     GB_Adot2B__max_iseq_fp32
 // A'*B function (dot3):     GB_Adot3B__max_iseq_fp32
-// A*B function (heap):      GB_AheapB__max_iseq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik == bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISEQ_FP32 || GxB_NO_MAX_ISEQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_fp32
 GrB_Info GB_Adot2B__max_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_fp32
 GrB_Info GB_Adot3B__max_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_fp64.c b/Source/Generated/GB_AxB__max_iseq_fp64.c
index b23ff08a74..52d7f4e786 100644
--- a/Source/Generated/GB_AxB__max_iseq_fp64.c
+++ b/Source/Generated/GB_AxB__max_iseq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_fp64
 // A'*B function (dot2):     GB_Adot2B__max_iseq_fp64
 // A'*B function (dot3):     GB_Adot3B__max_iseq_fp64
-// A*B function (heap):      GB_AheapB__max_iseq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik == bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISEQ_FP64 || GxB_NO_MAX_ISEQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_fp64
 GrB_Info GB_Adot2B__max_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_fp64
 GrB_Info GB_Adot3B__max_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_int16.c b/Source/Generated/GB_AxB__max_iseq_int16.c
index 0a21fc6dee..a7d82229ef 100644
--- a/Source/Generated/GB_AxB__max_iseq_int16.c
+++ b/Source/Generated/GB_AxB__max_iseq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_int16
 // A'*B function (dot2):     GB_Adot2B__max_iseq_int16
 // A'*B function (dot3):     GB_Adot3B__max_iseq_int16
-// A*B function (heap):      GB_AheapB__max_iseq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISEQ_INT16 || GxB_NO_MAX_ISEQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_int16
 GrB_Info GB_Adot2B__max_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_int16
 GrB_Info GB_Adot3B__max_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_int32.c b/Source/Generated/GB_AxB__max_iseq_int32.c
index d8ed6e0867..f99e832083 100644
--- a/Source/Generated/GB_AxB__max_iseq_int32.c
+++ b/Source/Generated/GB_AxB__max_iseq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_int32
 // A'*B function (dot2):     GB_Adot2B__max_iseq_int32
 // A'*B function (dot3):     GB_Adot3B__max_iseq_int32
-// A*B function (heap):      GB_AheapB__max_iseq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISEQ_INT32 || GxB_NO_MAX_ISEQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_int32
 GrB_Info GB_Adot2B__max_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_int32
 GrB_Info GB_Adot3B__max_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_int64.c b/Source/Generated/GB_AxB__max_iseq_int64.c
index 73748dd566..f57f686d3a 100644
--- a/Source/Generated/GB_AxB__max_iseq_int64.c
+++ b/Source/Generated/GB_AxB__max_iseq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_int64
 // A'*B function (dot2):     GB_Adot2B__max_iseq_int64
 // A'*B function (dot3):     GB_Adot3B__max_iseq_int64
-// A*B function (heap):      GB_AheapB__max_iseq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISEQ_INT64 || GxB_NO_MAX_ISEQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_int64
 GrB_Info GB_Adot2B__max_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_int64
 GrB_Info GB_Adot3B__max_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_int8.c b/Source/Generated/GB_AxB__max_iseq_int8.c
index 6997409021..f30a0129a4 100644
--- a/Source/Generated/GB_AxB__max_iseq_int8.c
+++ b/Source/Generated/GB_AxB__max_iseq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_int8
 // A'*B function (dot2):     GB_Adot2B__max_iseq_int8
 // A'*B function (dot3):     GB_Adot3B__max_iseq_int8
-// A*B function (heap):      GB_AheapB__max_iseq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISEQ_INT8 || GxB_NO_MAX_ISEQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_int8
 GrB_Info GB_Adot2B__max_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_int8
 GrB_Info GB_Adot3B__max_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_iseq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_iseq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_uint16.c b/Source/Generated/GB_AxB__max_iseq_uint16.c
index 8fc619f09d..ba8445babe 100644
--- a/Source/Generated/GB_AxB__max_iseq_uint16.c
+++ b/Source/Generated/GB_AxB__max_iseq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_uint16
 // A'*B function (dot2):     GB_Adot2B__max_iseq_uint16
 // A'*B function (dot3):     GB_Adot3B__max_iseq_uint16
-// A*B function (heap):      GB_AheapB__max_iseq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISEQ_UINT16 || GxB_NO_MAX_ISEQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_uint16
 GrB_Info GB_Adot2B__max_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_uint16
 GrB_Info GB_Adot3B__max_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_iseq_uint16
+GrB_Info GB_Asaxpy3B__max_iseq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_uint32.c b/Source/Generated/GB_AxB__max_iseq_uint32.c
index f7f588e35d..25749a732e 100644
--- a/Source/Generated/GB_AxB__max_iseq_uint32.c
+++ b/Source/Generated/GB_AxB__max_iseq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_uint32
 // A'*B function (dot2):     GB_Adot2B__max_iseq_uint32
 // A'*B function (dot3):     GB_Adot3B__max_iseq_uint32
-// A*B function (heap):      GB_AheapB__max_iseq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISEQ_UINT32 || GxB_NO_MAX_ISEQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_uint32
 GrB_Info GB_Adot2B__max_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_uint32
 GrB_Info GB_Adot3B__max_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_iseq_uint32
+GrB_Info GB_Asaxpy3B__max_iseq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_uint64.c b/Source/Generated/GB_AxB__max_iseq_uint64.c
index e1bcd1aa0d..7fc22daf19 100644
--- a/Source/Generated/GB_AxB__max_iseq_uint64.c
+++ b/Source/Generated/GB_AxB__max_iseq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_uint64
 // A'*B function (dot2):     GB_Adot2B__max_iseq_uint64
 // A'*B function (dot3):     GB_Adot3B__max_iseq_uint64
-// A*B function (heap):      GB_AheapB__max_iseq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISEQ_UINT64 || GxB_NO_MAX_ISEQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_uint64
 GrB_Info GB_Adot2B__max_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_uint64
 GrB_Info GB_Adot3B__max_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_iseq_uint64
+GrB_Info GB_Asaxpy3B__max_iseq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_iseq_uint8.c b/Source/Generated/GB_AxB__max_iseq_uint8.c
index 4fb4004d20..ede18a4605 100644
--- a/Source/Generated/GB_AxB__max_iseq_uint8.c
+++ b/Source/Generated/GB_AxB__max_iseq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_iseq_uint8
 // A'*B function (dot2):     GB_Adot2B__max_iseq_uint8
 // A'*B function (dot3):     GB_Adot3B__max_iseq_uint8
-// A*B function (heap):      GB_AheapB__max_iseq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_iseq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_iseq_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik == bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISEQ_UINT8 || GxB_NO_MAX_ISEQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_iseq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_iseq_uint8
 GrB_Info GB_Adot2B__max_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_iseq_uint8
 GrB_Info GB_Adot3B__max_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_iseq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_iseq_uint8
+GrB_Info GB_Asaxpy3B__max_iseq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_fp32.c b/Source/Generated/GB_AxB__max_isge_fp32.c
index 36fdb9e13f..961f94f27d 100644
--- a/Source/Generated/GB_AxB__max_isge_fp32.c
+++ b/Source/Generated/GB_AxB__max_isge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_fp32
 // A'*B function (dot2):     GB_Adot2B__max_isge_fp32
 // A'*B function (dot3):     GB_Adot3B__max_isge_fp32
-// A*B function (heap):      GB_AheapB__max_isge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik >= bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISGE_FP32 || GxB_NO_MAX_ISGE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_fp32
 GrB_Info GB_Adot2B__max_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_fp32
 GrB_Info GB_Adot3B__max_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_fp64.c b/Source/Generated/GB_AxB__max_isge_fp64.c
index 59fd006f39..36982c7c60 100644
--- a/Source/Generated/GB_AxB__max_isge_fp64.c
+++ b/Source/Generated/GB_AxB__max_isge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_fp64
 // A'*B function (dot2):     GB_Adot2B__max_isge_fp64
 // A'*B function (dot3):     GB_Adot3B__max_isge_fp64
-// A*B function (heap):      GB_AheapB__max_isge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik >= bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISGE_FP64 || GxB_NO_MAX_ISGE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_fp64
 GrB_Info GB_Adot2B__max_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_fp64
 GrB_Info GB_Adot3B__max_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_int16.c b/Source/Generated/GB_AxB__max_isge_int16.c
index abbc479b8b..d1abd4a874 100644
--- a/Source/Generated/GB_AxB__max_isge_int16.c
+++ b/Source/Generated/GB_AxB__max_isge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_int16
 // A'*B function (dot2):     GB_Adot2B__max_isge_int16
 // A'*B function (dot3):     GB_Adot3B__max_isge_int16
-// A*B function (heap):      GB_AheapB__max_isge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISGE_INT16 || GxB_NO_MAX_ISGE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_int16
 GrB_Info GB_Adot2B__max_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_int16
 GrB_Info GB_Adot3B__max_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_int32.c b/Source/Generated/GB_AxB__max_isge_int32.c
index 905ae01a3b..4797579e29 100644
--- a/Source/Generated/GB_AxB__max_isge_int32.c
+++ b/Source/Generated/GB_AxB__max_isge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_int32
 // A'*B function (dot2):     GB_Adot2B__max_isge_int32
 // A'*B function (dot3):     GB_Adot3B__max_isge_int32
-// A*B function (heap):      GB_AheapB__max_isge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISGE_INT32 || GxB_NO_MAX_ISGE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_int32
 GrB_Info GB_Adot2B__max_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_int32
 GrB_Info GB_Adot3B__max_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_int64.c b/Source/Generated/GB_AxB__max_isge_int64.c
index 39adeb45b1..d775bdf86e 100644
--- a/Source/Generated/GB_AxB__max_isge_int64.c
+++ b/Source/Generated/GB_AxB__max_isge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_int64
 // A'*B function (dot2):     GB_Adot2B__max_isge_int64
 // A'*B function (dot3):     GB_Adot3B__max_isge_int64
-// A*B function (heap):      GB_AheapB__max_isge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISGE_INT64 || GxB_NO_MAX_ISGE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_int64
 GrB_Info GB_Adot2B__max_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_int64
 GrB_Info GB_Adot3B__max_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_int8.c b/Source/Generated/GB_AxB__max_isge_int8.c
index 86d46bbc68..5a6fb5a331 100644
--- a/Source/Generated/GB_AxB__max_isge_int8.c
+++ b/Source/Generated/GB_AxB__max_isge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_int8
 // A'*B function (dot2):     GB_Adot2B__max_isge_int8
 // A'*B function (dot3):     GB_Adot3B__max_isge_int8
-// A*B function (heap):      GB_AheapB__max_isge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISGE_INT8 || GxB_NO_MAX_ISGE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_int8
 GrB_Info GB_Adot2B__max_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_int8
 GrB_Info GB_Adot3B__max_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_uint16.c b/Source/Generated/GB_AxB__max_isge_uint16.c
index ab26302912..bdb9f42828 100644
--- a/Source/Generated/GB_AxB__max_isge_uint16.c
+++ b/Source/Generated/GB_AxB__max_isge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_uint16
 // A'*B function (dot2):     GB_Adot2B__max_isge_uint16
 // A'*B function (dot3):     GB_Adot3B__max_isge_uint16
-// A*B function (heap):      GB_AheapB__max_isge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISGE_UINT16 || GxB_NO_MAX_ISGE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_uint16
 GrB_Info GB_Adot2B__max_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_uint16
 GrB_Info GB_Adot3B__max_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isge_uint16
+GrB_Info GB_Asaxpy3B__max_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_uint32.c b/Source/Generated/GB_AxB__max_isge_uint32.c
index 095cb78c97..de115b12f1 100644
--- a/Source/Generated/GB_AxB__max_isge_uint32.c
+++ b/Source/Generated/GB_AxB__max_isge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_uint32
 // A'*B function (dot2):     GB_Adot2B__max_isge_uint32
 // A'*B function (dot3):     GB_Adot3B__max_isge_uint32
-// A*B function (heap):      GB_AheapB__max_isge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISGE_UINT32 || GxB_NO_MAX_ISGE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_uint32
 GrB_Info GB_Adot2B__max_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_uint32
 GrB_Info GB_Adot3B__max_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isge_uint32
+GrB_Info GB_Asaxpy3B__max_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_uint64.c b/Source/Generated/GB_AxB__max_isge_uint64.c
index 92c4330244..e4921386fc 100644
--- a/Source/Generated/GB_AxB__max_isge_uint64.c
+++ b/Source/Generated/GB_AxB__max_isge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_uint64
 // A'*B function (dot2):     GB_Adot2B__max_isge_uint64
 // A'*B function (dot3):     GB_Adot3B__max_isge_uint64
-// A*B function (heap):      GB_AheapB__max_isge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISGE_UINT64 || GxB_NO_MAX_ISGE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_uint64
 GrB_Info GB_Adot2B__max_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_uint64
 GrB_Info GB_Adot3B__max_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isge_uint64
+GrB_Info GB_Asaxpy3B__max_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isge_uint8.c b/Source/Generated/GB_AxB__max_isge_uint8.c
index 5a1b214103..150aee604e 100644
--- a/Source/Generated/GB_AxB__max_isge_uint8.c
+++ b/Source/Generated/GB_AxB__max_isge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isge_uint8
 // A'*B function (dot2):     GB_Adot2B__max_isge_uint8
 // A'*B function (dot3):     GB_Adot3B__max_isge_uint8
-// A*B function (heap):      GB_AheapB__max_isge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isge_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik >= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISGE_UINT8 || GxB_NO_MAX_ISGE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isge_uint8
 GrB_Info GB_Adot2B__max_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isge_uint8
 GrB_Info GB_Adot3B__max_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isge_uint8
+GrB_Info GB_Asaxpy3B__max_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_fp32.c b/Source/Generated/GB_AxB__max_isgt_fp32.c
index 1ce56dcdbf..6232c59625 100644
--- a/Source/Generated/GB_AxB__max_isgt_fp32.c
+++ b/Source/Generated/GB_AxB__max_isgt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_fp32
 // A'*B function (dot2):     GB_Adot2B__max_isgt_fp32
 // A'*B function (dot3):     GB_Adot3B__max_isgt_fp32
-// A*B function (heap):      GB_AheapB__max_isgt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik > bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISGT_FP32 || GxB_NO_MAX_ISGT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_fp32
 GrB_Info GB_Adot2B__max_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_fp32
 GrB_Info GB_Adot3B__max_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_fp64.c b/Source/Generated/GB_AxB__max_isgt_fp64.c
index ea4db3cc50..536bf82809 100644
--- a/Source/Generated/GB_AxB__max_isgt_fp64.c
+++ b/Source/Generated/GB_AxB__max_isgt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_fp64
 // A'*B function (dot2):     GB_Adot2B__max_isgt_fp64
 // A'*B function (dot3):     GB_Adot3B__max_isgt_fp64
-// A*B function (heap):      GB_AheapB__max_isgt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik > bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISGT_FP64 || GxB_NO_MAX_ISGT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_fp64
 GrB_Info GB_Adot2B__max_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_fp64
 GrB_Info GB_Adot3B__max_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_int16.c b/Source/Generated/GB_AxB__max_isgt_int16.c
index 081045d3d9..5922f0c683 100644
--- a/Source/Generated/GB_AxB__max_isgt_int16.c
+++ b/Source/Generated/GB_AxB__max_isgt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_int16
 // A'*B function (dot2):     GB_Adot2B__max_isgt_int16
 // A'*B function (dot3):     GB_Adot3B__max_isgt_int16
-// A*B function (heap):      GB_AheapB__max_isgt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISGT_INT16 || GxB_NO_MAX_ISGT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_int16
 GrB_Info GB_Adot2B__max_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_int16
 GrB_Info GB_Adot3B__max_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_int32.c b/Source/Generated/GB_AxB__max_isgt_int32.c
index d3552e05ea..93fe1ad87a 100644
--- a/Source/Generated/GB_AxB__max_isgt_int32.c
+++ b/Source/Generated/GB_AxB__max_isgt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_int32
 // A'*B function (dot2):     GB_Adot2B__max_isgt_int32
 // A'*B function (dot3):     GB_Adot3B__max_isgt_int32
-// A*B function (heap):      GB_AheapB__max_isgt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISGT_INT32 || GxB_NO_MAX_ISGT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_int32
 GrB_Info GB_Adot2B__max_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_int32
 GrB_Info GB_Adot3B__max_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_int64.c b/Source/Generated/GB_AxB__max_isgt_int64.c
index b4dd55edae..e4cdba250c 100644
--- a/Source/Generated/GB_AxB__max_isgt_int64.c
+++ b/Source/Generated/GB_AxB__max_isgt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_int64
 // A'*B function (dot2):     GB_Adot2B__max_isgt_int64
 // A'*B function (dot3):     GB_Adot3B__max_isgt_int64
-// A*B function (heap):      GB_AheapB__max_isgt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISGT_INT64 || GxB_NO_MAX_ISGT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_int64
 GrB_Info GB_Adot2B__max_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_int64
 GrB_Info GB_Adot3B__max_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_int8.c b/Source/Generated/GB_AxB__max_isgt_int8.c
index 8861522397..7cb8d5c955 100644
--- a/Source/Generated/GB_AxB__max_isgt_int8.c
+++ b/Source/Generated/GB_AxB__max_isgt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_int8
 // A'*B function (dot2):     GB_Adot2B__max_isgt_int8
 // A'*B function (dot3):     GB_Adot3B__max_isgt_int8
-// A*B function (heap):      GB_AheapB__max_isgt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISGT_INT8 || GxB_NO_MAX_ISGT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_int8
 GrB_Info GB_Adot2B__max_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_int8
 GrB_Info GB_Adot3B__max_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isgt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_uint16.c b/Source/Generated/GB_AxB__max_isgt_uint16.c
index 37adb0e0c4..36ce8a0f5b 100644
--- a/Source/Generated/GB_AxB__max_isgt_uint16.c
+++ b/Source/Generated/GB_AxB__max_isgt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_uint16
 // A'*B function (dot2):     GB_Adot2B__max_isgt_uint16
 // A'*B function (dot3):     GB_Adot3B__max_isgt_uint16
-// A*B function (heap):      GB_AheapB__max_isgt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISGT_UINT16 || GxB_NO_MAX_ISGT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_uint16
 GrB_Info GB_Adot2B__max_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_uint16
 GrB_Info GB_Adot3B__max_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isgt_uint16
+GrB_Info GB_Asaxpy3B__max_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_uint32.c b/Source/Generated/GB_AxB__max_isgt_uint32.c
index 1c2c7663c5..93a6de6790 100644
--- a/Source/Generated/GB_AxB__max_isgt_uint32.c
+++ b/Source/Generated/GB_AxB__max_isgt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_uint32
 // A'*B function (dot2):     GB_Adot2B__max_isgt_uint32
 // A'*B function (dot3):     GB_Adot3B__max_isgt_uint32
-// A*B function (heap):      GB_AheapB__max_isgt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISGT_UINT32 || GxB_NO_MAX_ISGT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_uint32
 GrB_Info GB_Adot2B__max_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_uint32
 GrB_Info GB_Adot3B__max_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isgt_uint32
+GrB_Info GB_Asaxpy3B__max_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_uint64.c b/Source/Generated/GB_AxB__max_isgt_uint64.c
index ae161b1783..909351b695 100644
--- a/Source/Generated/GB_AxB__max_isgt_uint64.c
+++ b/Source/Generated/GB_AxB__max_isgt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_uint64
 // A'*B function (dot2):     GB_Adot2B__max_isgt_uint64
 // A'*B function (dot3):     GB_Adot3B__max_isgt_uint64
-// A*B function (heap):      GB_AheapB__max_isgt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISGT_UINT64 || GxB_NO_MAX_ISGT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_uint64
 GrB_Info GB_Adot2B__max_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_uint64
 GrB_Info GB_Adot3B__max_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isgt_uint64
+GrB_Info GB_Asaxpy3B__max_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isgt_uint8.c b/Source/Generated/GB_AxB__max_isgt_uint8.c
index 17248695ea..72ff8e8881 100644
--- a/Source/Generated/GB_AxB__max_isgt_uint8.c
+++ b/Source/Generated/GB_AxB__max_isgt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isgt_uint8
 // A'*B function (dot2):     GB_Adot2B__max_isgt_uint8
 // A'*B function (dot3):     GB_Adot3B__max_isgt_uint8
-// A*B function (heap):      GB_AheapB__max_isgt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isgt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isgt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik > bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISGT_UINT8 || GxB_NO_MAX_ISGT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isgt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isgt_uint8
 GrB_Info GB_Adot2B__max_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isgt_uint8
 GrB_Info GB_Adot3B__max_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isgt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isgt_uint8
+GrB_Info GB_Asaxpy3B__max_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_fp32.c b/Source/Generated/GB_AxB__max_isle_fp32.c
index c0f584ab97..55fc766c16 100644
--- a/Source/Generated/GB_AxB__max_isle_fp32.c
+++ b/Source/Generated/GB_AxB__max_isle_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_fp32
 // A'*B function (dot2):     GB_Adot2B__max_isle_fp32
 // A'*B function (dot3):     GB_Adot3B__max_isle_fp32
-// A*B function (heap):      GB_AheapB__max_isle_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik <= bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISLE_FP32 || GxB_NO_MAX_ISLE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_fp32
 GrB_Info GB_Adot2B__max_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_fp32
 GrB_Info GB_Adot3B__max_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_fp64.c b/Source/Generated/GB_AxB__max_isle_fp64.c
index 8e96ecd710..58b1d39669 100644
--- a/Source/Generated/GB_AxB__max_isle_fp64.c
+++ b/Source/Generated/GB_AxB__max_isle_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_fp64
 // A'*B function (dot2):     GB_Adot2B__max_isle_fp64
 // A'*B function (dot3):     GB_Adot3B__max_isle_fp64
-// A*B function (heap):      GB_AheapB__max_isle_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik <= bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISLE_FP64 || GxB_NO_MAX_ISLE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_fp64
 GrB_Info GB_Adot2B__max_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_fp64
 GrB_Info GB_Adot3B__max_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_int16.c b/Source/Generated/GB_AxB__max_isle_int16.c
index afdbea1492..acc88fc402 100644
--- a/Source/Generated/GB_AxB__max_isle_int16.c
+++ b/Source/Generated/GB_AxB__max_isle_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_int16
 // A'*B function (dot2):     GB_Adot2B__max_isle_int16
 // A'*B function (dot3):     GB_Adot3B__max_isle_int16
-// A*B function (heap):      GB_AheapB__max_isle_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISLE_INT16 || GxB_NO_MAX_ISLE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_int16
 GrB_Info GB_Adot2B__max_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_int16
 GrB_Info GB_Adot3B__max_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_int32.c b/Source/Generated/GB_AxB__max_isle_int32.c
index 186193f975..d81629d0e0 100644
--- a/Source/Generated/GB_AxB__max_isle_int32.c
+++ b/Source/Generated/GB_AxB__max_isle_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_int32
 // A'*B function (dot2):     GB_Adot2B__max_isle_int32
 // A'*B function (dot3):     GB_Adot3B__max_isle_int32
-// A*B function (heap):      GB_AheapB__max_isle_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISLE_INT32 || GxB_NO_MAX_ISLE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_int32
 GrB_Info GB_Adot2B__max_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_int32
 GrB_Info GB_Adot3B__max_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_int64.c b/Source/Generated/GB_AxB__max_isle_int64.c
index ae0195a841..49936cc74c 100644
--- a/Source/Generated/GB_AxB__max_isle_int64.c
+++ b/Source/Generated/GB_AxB__max_isle_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_int64
 // A'*B function (dot2):     GB_Adot2B__max_isle_int64
 // A'*B function (dot3):     GB_Adot3B__max_isle_int64
-// A*B function (heap):      GB_AheapB__max_isle_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISLE_INT64 || GxB_NO_MAX_ISLE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_int64
 GrB_Info GB_Adot2B__max_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_int64
 GrB_Info GB_Adot3B__max_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_int8.c b/Source/Generated/GB_AxB__max_isle_int8.c
index 8ef416d832..ae2e00da06 100644
--- a/Source/Generated/GB_AxB__max_isle_int8.c
+++ b/Source/Generated/GB_AxB__max_isle_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_int8
 // A'*B function (dot2):     GB_Adot2B__max_isle_int8
 // A'*B function (dot3):     GB_Adot3B__max_isle_int8
-// A*B function (heap):      GB_AheapB__max_isle_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISLE_INT8 || GxB_NO_MAX_ISLE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_int8
 GrB_Info GB_Adot2B__max_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_int8
 GrB_Info GB_Adot3B__max_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isle_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_uint16.c b/Source/Generated/GB_AxB__max_isle_uint16.c
index 8b9d65d324..ffa766fd62 100644
--- a/Source/Generated/GB_AxB__max_isle_uint16.c
+++ b/Source/Generated/GB_AxB__max_isle_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_uint16
 // A'*B function (dot2):     GB_Adot2B__max_isle_uint16
 // A'*B function (dot3):     GB_Adot3B__max_isle_uint16
-// A*B function (heap):      GB_AheapB__max_isle_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISLE_UINT16 || GxB_NO_MAX_ISLE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_uint16
 GrB_Info GB_Adot2B__max_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_uint16
 GrB_Info GB_Adot3B__max_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isle_uint16
+GrB_Info GB_Asaxpy3B__max_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_uint32.c b/Source/Generated/GB_AxB__max_isle_uint32.c
index c9d1c91b94..31e7eae536 100644
--- a/Source/Generated/GB_AxB__max_isle_uint32.c
+++ b/Source/Generated/GB_AxB__max_isle_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_uint32
 // A'*B function (dot2):     GB_Adot2B__max_isle_uint32
 // A'*B function (dot3):     GB_Adot3B__max_isle_uint32
-// A*B function (heap):      GB_AheapB__max_isle_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISLE_UINT32 || GxB_NO_MAX_ISLE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_uint32
 GrB_Info GB_Adot2B__max_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_uint32
 GrB_Info GB_Adot3B__max_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isle_uint32
+GrB_Info GB_Asaxpy3B__max_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_uint64.c b/Source/Generated/GB_AxB__max_isle_uint64.c
index f278ed86dd..aaf356bf15 100644
--- a/Source/Generated/GB_AxB__max_isle_uint64.c
+++ b/Source/Generated/GB_AxB__max_isle_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_uint64
 // A'*B function (dot2):     GB_Adot2B__max_isle_uint64
 // A'*B function (dot3):     GB_Adot3B__max_isle_uint64
-// A*B function (heap):      GB_AheapB__max_isle_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISLE_UINT64 || GxB_NO_MAX_ISLE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_uint64
 GrB_Info GB_Adot2B__max_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_uint64
 GrB_Info GB_Adot3B__max_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isle_uint64
+GrB_Info GB_Asaxpy3B__max_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isle_uint8.c b/Source/Generated/GB_AxB__max_isle_uint8.c
index bf28be21b5..fa0325d9b0 100644
--- a/Source/Generated/GB_AxB__max_isle_uint8.c
+++ b/Source/Generated/GB_AxB__max_isle_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isle_uint8
 // A'*B function (dot2):     GB_Adot2B__max_isle_uint8
 // A'*B function (dot3):     GB_Adot3B__max_isle_uint8
-// A*B function (heap):      GB_AheapB__max_isle_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isle_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isle_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik <= bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISLE_UINT8 || GxB_NO_MAX_ISLE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isle_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isle_uint8
 GrB_Info GB_Adot2B__max_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isle_uint8
 GrB_Info GB_Adot3B__max_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isle_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isle_uint8
+GrB_Info GB_Asaxpy3B__max_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_fp32.c b/Source/Generated/GB_AxB__max_islt_fp32.c
index 0cfc12d260..1b08cb099b 100644
--- a/Source/Generated/GB_AxB__max_islt_fp32.c
+++ b/Source/Generated/GB_AxB__max_islt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_fp32
 // A'*B function (dot2):     GB_Adot2B__max_islt_fp32
 // A'*B function (dot3):     GB_Adot3B__max_islt_fp32
-// A*B function (heap):      GB_AheapB__max_islt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik < bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISLT_FP32 || GxB_NO_MAX_ISLT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_fp32
 GrB_Info GB_Adot2B__max_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_fp32
 GrB_Info GB_Adot3B__max_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_fp64.c b/Source/Generated/GB_AxB__max_islt_fp64.c
index 01466bdb39..f60e0bbbe2 100644
--- a/Source/Generated/GB_AxB__max_islt_fp64.c
+++ b/Source/Generated/GB_AxB__max_islt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_fp64
 // A'*B function (dot2):     GB_Adot2B__max_islt_fp64
 // A'*B function (dot3):     GB_Adot3B__max_islt_fp64
-// A*B function (heap):      GB_AheapB__max_islt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik < bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISLT_FP64 || GxB_NO_MAX_ISLT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_fp64
 GrB_Info GB_Adot2B__max_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_fp64
 GrB_Info GB_Adot3B__max_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_int16.c b/Source/Generated/GB_AxB__max_islt_int16.c
index 8eab60a8ee..9635c02d50 100644
--- a/Source/Generated/GB_AxB__max_islt_int16.c
+++ b/Source/Generated/GB_AxB__max_islt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_int16
 // A'*B function (dot2):     GB_Adot2B__max_islt_int16
 // A'*B function (dot3):     GB_Adot3B__max_islt_int16
-// A*B function (heap):      GB_AheapB__max_islt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISLT_INT16 || GxB_NO_MAX_ISLT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_int16
 GrB_Info GB_Adot2B__max_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_int16
 GrB_Info GB_Adot3B__max_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_int32.c b/Source/Generated/GB_AxB__max_islt_int32.c
index d2b418b786..b8bf791bac 100644
--- a/Source/Generated/GB_AxB__max_islt_int32.c
+++ b/Source/Generated/GB_AxB__max_islt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_int32
 // A'*B function (dot2):     GB_Adot2B__max_islt_int32
 // A'*B function (dot3):     GB_Adot3B__max_islt_int32
-// A*B function (heap):      GB_AheapB__max_islt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISLT_INT32 || GxB_NO_MAX_ISLT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_int32
 GrB_Info GB_Adot2B__max_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_int32
 GrB_Info GB_Adot3B__max_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_int64.c b/Source/Generated/GB_AxB__max_islt_int64.c
index 20b03b14aa..e0c9841e17 100644
--- a/Source/Generated/GB_AxB__max_islt_int64.c
+++ b/Source/Generated/GB_AxB__max_islt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_int64
 // A'*B function (dot2):     GB_Adot2B__max_islt_int64
 // A'*B function (dot3):     GB_Adot3B__max_islt_int64
-// A*B function (heap):      GB_AheapB__max_islt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISLT_INT64 || GxB_NO_MAX_ISLT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_int64
 GrB_Info GB_Adot2B__max_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_int64
 GrB_Info GB_Adot3B__max_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_int8.c b/Source/Generated/GB_AxB__max_islt_int8.c
index 1b3e6773fd..148ac279be 100644
--- a/Source/Generated/GB_AxB__max_islt_int8.c
+++ b/Source/Generated/GB_AxB__max_islt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_int8
 // A'*B function (dot2):     GB_Adot2B__max_islt_int8
 // A'*B function (dot3):     GB_Adot3B__max_islt_int8
-// A*B function (heap):      GB_AheapB__max_islt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISLT_INT8 || GxB_NO_MAX_ISLT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_int8
 GrB_Info GB_Adot2B__max_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_int8
 GrB_Info GB_Adot3B__max_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_islt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_uint16.c b/Source/Generated/GB_AxB__max_islt_uint16.c
index a90d762e20..edec650fb2 100644
--- a/Source/Generated/GB_AxB__max_islt_uint16.c
+++ b/Source/Generated/GB_AxB__max_islt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_uint16
 // A'*B function (dot2):     GB_Adot2B__max_islt_uint16
 // A'*B function (dot3):     GB_Adot3B__max_islt_uint16
-// A*B function (heap):      GB_AheapB__max_islt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISLT_UINT16 || GxB_NO_MAX_ISLT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_uint16
 GrB_Info GB_Adot2B__max_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_uint16
 GrB_Info GB_Adot3B__max_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_islt_uint16
+GrB_Info GB_Asaxpy3B__max_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_uint32.c b/Source/Generated/GB_AxB__max_islt_uint32.c
index 9dbc5908ec..6f345fe10b 100644
--- a/Source/Generated/GB_AxB__max_islt_uint32.c
+++ b/Source/Generated/GB_AxB__max_islt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_uint32
 // A'*B function (dot2):     GB_Adot2B__max_islt_uint32
 // A'*B function (dot3):     GB_Adot3B__max_islt_uint32
-// A*B function (heap):      GB_AheapB__max_islt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISLT_UINT32 || GxB_NO_MAX_ISLT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_uint32
 GrB_Info GB_Adot2B__max_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_uint32
 GrB_Info GB_Adot3B__max_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_islt_uint32
+GrB_Info GB_Asaxpy3B__max_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_uint64.c b/Source/Generated/GB_AxB__max_islt_uint64.c
index 8d6ec24b3e..66b7c00d5d 100644
--- a/Source/Generated/GB_AxB__max_islt_uint64.c
+++ b/Source/Generated/GB_AxB__max_islt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_uint64
 // A'*B function (dot2):     GB_Adot2B__max_islt_uint64
 // A'*B function (dot3):     GB_Adot3B__max_islt_uint64
-// A*B function (heap):      GB_AheapB__max_islt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISLT_UINT64 || GxB_NO_MAX_ISLT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_uint64
 GrB_Info GB_Adot2B__max_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_uint64
 GrB_Info GB_Adot3B__max_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_islt_uint64
+GrB_Info GB_Asaxpy3B__max_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_islt_uint8.c b/Source/Generated/GB_AxB__max_islt_uint8.c
index ea351eea76..1907658a37 100644
--- a/Source/Generated/GB_AxB__max_islt_uint8.c
+++ b/Source/Generated/GB_AxB__max_islt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_islt_uint8
 // A'*B function (dot2):     GB_Adot2B__max_islt_uint8
 // A'*B function (dot3):     GB_Adot3B__max_islt_uint8
-// A*B function (heap):      GB_AheapB__max_islt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_islt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_islt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik < bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISLT_UINT8 || GxB_NO_MAX_ISLT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_islt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_islt_uint8
 GrB_Info GB_Adot2B__max_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_islt_uint8
 GrB_Info GB_Adot3B__max_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_islt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_islt_uint8
+GrB_Info GB_Asaxpy3B__max_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_fp32.c b/Source/Generated/GB_AxB__max_isne_fp32.c
index ddb664344c..d2852bb37c 100644
--- a/Source/Generated/GB_AxB__max_isne_fp32.c
+++ b/Source/Generated/GB_AxB__max_isne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_fp32
 // A'*B function (dot2):     GB_Adot2B__max_isne_fp32
 // A'*B function (dot3):     GB_Adot3B__max_isne_fp32
-// A*B function (heap):      GB_AheapB__max_isne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik != bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_ISNE_FP32 || GxB_NO_MAX_ISNE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_fp32
 GrB_Info GB_Adot2B__max_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_fp32
 GrB_Info GB_Adot3B__max_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_fp64.c b/Source/Generated/GB_AxB__max_isne_fp64.c
index 6979c6beb8..11ff5b67e3 100644
--- a/Source/Generated/GB_AxB__max_isne_fp64.c
+++ b/Source/Generated/GB_AxB__max_isne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_fp64
 // A'*B function (dot2):     GB_Adot2B__max_isne_fp64
 // A'*B function (dot3):     GB_Adot3B__max_isne_fp64
-// A*B function (heap):      GB_AheapB__max_isne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik != bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_ISNE_FP64 || GxB_NO_MAX_ISNE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_fp64
 GrB_Info GB_Adot2B__max_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_fp64
 GrB_Info GB_Adot3B__max_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_int16.c b/Source/Generated/GB_AxB__max_isne_int16.c
index 4c6114b17c..cfc2a6a831 100644
--- a/Source/Generated/GB_AxB__max_isne_int16.c
+++ b/Source/Generated/GB_AxB__max_isne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_int16
 // A'*B function (dot2):     GB_Adot2B__max_isne_int16
 // A'*B function (dot3):     GB_Adot3B__max_isne_int16
-// A*B function (heap):      GB_AheapB__max_isne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_ISNE_INT16 || GxB_NO_MAX_ISNE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_int16
 GrB_Info GB_Adot2B__max_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_int16
 GrB_Info GB_Adot3B__max_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_int32.c b/Source/Generated/GB_AxB__max_isne_int32.c
index 8990e7b5f7..27e86091a0 100644
--- a/Source/Generated/GB_AxB__max_isne_int32.c
+++ b/Source/Generated/GB_AxB__max_isne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_int32
 // A'*B function (dot2):     GB_Adot2B__max_isne_int32
 // A'*B function (dot3):     GB_Adot3B__max_isne_int32
-// A*B function (heap):      GB_AheapB__max_isne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_ISNE_INT32 || GxB_NO_MAX_ISNE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_int32
 GrB_Info GB_Adot2B__max_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_int32
 GrB_Info GB_Adot3B__max_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_int64.c b/Source/Generated/GB_AxB__max_isne_int64.c
index 4fdd70514d..3d934692b7 100644
--- a/Source/Generated/GB_AxB__max_isne_int64.c
+++ b/Source/Generated/GB_AxB__max_isne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_int64
 // A'*B function (dot2):     GB_Adot2B__max_isne_int64
 // A'*B function (dot3):     GB_Adot3B__max_isne_int64
-// A*B function (heap):      GB_AheapB__max_isne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_ISNE_INT64 || GxB_NO_MAX_ISNE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_int64
 GrB_Info GB_Adot2B__max_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_int64
 GrB_Info GB_Adot3B__max_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_int8.c b/Source/Generated/GB_AxB__max_isne_int8.c
index 1d2b404274..32758c6ada 100644
--- a/Source/Generated/GB_AxB__max_isne_int8.c
+++ b/Source/Generated/GB_AxB__max_isne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_int8
 // A'*B function (dot2):     GB_Adot2B__max_isne_int8
 // A'*B function (dot3):     GB_Adot3B__max_isne_int8
-// A*B function (heap):      GB_AheapB__max_isne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_ISNE_INT8 || GxB_NO_MAX_ISNE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_int8
 GrB_Info GB_Adot2B__max_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_int8
 GrB_Info GB_Adot3B__max_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_isne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_uint16.c b/Source/Generated/GB_AxB__max_isne_uint16.c
index 07f8d3cf40..7ce7d77844 100644
--- a/Source/Generated/GB_AxB__max_isne_uint16.c
+++ b/Source/Generated/GB_AxB__max_isne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_uint16
 // A'*B function (dot2):     GB_Adot2B__max_isne_uint16
 // A'*B function (dot3):     GB_Adot3B__max_isne_uint16
-// A*B function (heap):      GB_AheapB__max_isne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_ISNE_UINT16 || GxB_NO_MAX_ISNE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_uint16
 GrB_Info GB_Adot2B__max_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_uint16
 GrB_Info GB_Adot3B__max_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isne_uint16
+GrB_Info GB_Asaxpy3B__max_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_uint32.c b/Source/Generated/GB_AxB__max_isne_uint32.c
index c5bd2591b7..6dd968b475 100644
--- a/Source/Generated/GB_AxB__max_isne_uint32.c
+++ b/Source/Generated/GB_AxB__max_isne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_uint32
 // A'*B function (dot2):     GB_Adot2B__max_isne_uint32
 // A'*B function (dot3):     GB_Adot3B__max_isne_uint32
-// A*B function (heap):      GB_AheapB__max_isne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_ISNE_UINT32 || GxB_NO_MAX_ISNE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_uint32
 GrB_Info GB_Adot2B__max_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_uint32
 GrB_Info GB_Adot3B__max_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isne_uint32
+GrB_Info GB_Asaxpy3B__max_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_uint64.c b/Source/Generated/GB_AxB__max_isne_uint64.c
index a6b379fa98..1db588777b 100644
--- a/Source/Generated/GB_AxB__max_isne_uint64.c
+++ b/Source/Generated/GB_AxB__max_isne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_uint64
 // A'*B function (dot2):     GB_Adot2B__max_isne_uint64
 // A'*B function (dot3):     GB_Adot3B__max_isne_uint64
-// A*B function (heap):      GB_AheapB__max_isne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_ISNE_UINT64 || GxB_NO_MAX_ISNE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_uint64
 GrB_Info GB_Adot2B__max_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_uint64
 GrB_Info GB_Adot3B__max_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isne_uint64
+GrB_Info GB_Asaxpy3B__max_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_isne_uint8.c b/Source/Generated/GB_AxB__max_isne_uint8.c
index 7801f4109a..5bfd19fffa 100644
--- a/Source/Generated/GB_AxB__max_isne_uint8.c
+++ b/Source/Generated/GB_AxB__max_isne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_isne_uint8
 // A'*B function (dot2):     GB_Adot2B__max_isne_uint8
 // A'*B function (dot3):     GB_Adot3B__max_isne_uint8
-// A*B function (heap):      GB_AheapB__max_isne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_isne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_isne_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, (aik != bkj))
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_ISNE_UINT8 || GxB_NO_MAX_ISNE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_isne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_isne_uint8
 GrB_Info GB_Adot2B__max_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_isne_uint8
 GrB_Info GB_Adot3B__max_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_isne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_isne_uint8
+GrB_Info GB_Asaxpy3B__max_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_fp32.c b/Source/Generated/GB_AxB__max_land_fp32.c
index 562a17d5cc..7ea6437182 100644
--- a/Source/Generated/GB_AxB__max_land_fp32.c
+++ b/Source/Generated/GB_AxB__max_land_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_fp32
 // A'*B function (dot2):     GB_Adot2B__max_land_fp32
 // A'*B function (dot3):     GB_Adot3B__max_land_fp32
-// A*B function (heap):      GB_AheapB__max_land_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, ((aik != 0) && (bkj != 0)))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, ((x != 0) && (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, ((x != 0) && (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_LAND_FP32 || GxB_NO_MAX_LAND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_land_fp32
 GrB_Info GB_Adot2B__max_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_fp32
 GrB_Info GB_Adot3B__max_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_land_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_fp64.c b/Source/Generated/GB_AxB__max_land_fp64.c
index 1e1fe3ffe6..03daf95d4f 100644
--- a/Source/Generated/GB_AxB__max_land_fp64.c
+++ b/Source/Generated/GB_AxB__max_land_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_fp64
 // A'*B function (dot2):     GB_Adot2B__max_land_fp64
 // A'*B function (dot3):     GB_Adot3B__max_land_fp64
-// A*B function (heap):      GB_AheapB__max_land_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, ((aik != 0) && (bkj != 0)))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, ((x != 0) && (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, ((x != 0) && (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_LAND_FP64 || GxB_NO_MAX_LAND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_land_fp64
 GrB_Info GB_Adot2B__max_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_fp64
 GrB_Info GB_Adot3B__max_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_land_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_int16.c b/Source/Generated/GB_AxB__max_land_int16.c
index 057de8210a..4af3325a24 100644
--- a/Source/Generated/GB_AxB__max_land_int16.c
+++ b/Source/Generated/GB_AxB__max_land_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_int16
 // A'*B function (dot2):     GB_Adot2B__max_land_int16
 // A'*B function (dot3):     GB_Adot3B__max_land_int16
-// A*B function (heap):      GB_AheapB__max_land_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_LAND_INT16 || GxB_NO_MAX_LAND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_int16
 GrB_Info GB_Adot2B__max_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_int16
 GrB_Info GB_Adot3B__max_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_int16
+GrB_Info GB_Asaxpy3B__max_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_int32.c b/Source/Generated/GB_AxB__max_land_int32.c
index 30e36ce78a..f7b3d3d82d 100644
--- a/Source/Generated/GB_AxB__max_land_int32.c
+++ b/Source/Generated/GB_AxB__max_land_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_int32
 // A'*B function (dot2):     GB_Adot2B__max_land_int32
 // A'*B function (dot3):     GB_Adot3B__max_land_int32
-// A*B function (heap):      GB_AheapB__max_land_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_LAND_INT32 || GxB_NO_MAX_LAND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_int32
 GrB_Info GB_Adot2B__max_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_int32
 GrB_Info GB_Adot3B__max_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_int32
+GrB_Info GB_Asaxpy3B__max_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_int64.c b/Source/Generated/GB_AxB__max_land_int64.c
index 126266945c..09f2c857e0 100644
--- a/Source/Generated/GB_AxB__max_land_int64.c
+++ b/Source/Generated/GB_AxB__max_land_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_int64
 // A'*B function (dot2):     GB_Adot2B__max_land_int64
 // A'*B function (dot3):     GB_Adot3B__max_land_int64
-// A*B function (heap):      GB_AheapB__max_land_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_LAND_INT64 || GxB_NO_MAX_LAND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_int64
 GrB_Info GB_Adot2B__max_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_int64
 GrB_Info GB_Adot3B__max_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_int64
+GrB_Info GB_Asaxpy3B__max_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_int8.c b/Source/Generated/GB_AxB__max_land_int8.c
index a05537e074..cb6edb1642 100644
--- a/Source/Generated/GB_AxB__max_land_int8.c
+++ b/Source/Generated/GB_AxB__max_land_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_int8
 // A'*B function (dot2):     GB_Adot2B__max_land_int8
 // A'*B function (dot3):     GB_Adot3B__max_land_int8
-// A*B function (heap):      GB_AheapB__max_land_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_LAND_INT8 || GxB_NO_MAX_LAND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_int8
 GrB_Info GB_Adot2B__max_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_int8
 GrB_Info GB_Adot3B__max_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_int8
+GrB_Info GB_Asaxpy3B__max_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_uint16.c b/Source/Generated/GB_AxB__max_land_uint16.c
index 1310a287f0..e1a22bf00b 100644
--- a/Source/Generated/GB_AxB__max_land_uint16.c
+++ b/Source/Generated/GB_AxB__max_land_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_uint16
 // A'*B function (dot2):     GB_Adot2B__max_land_uint16
 // A'*B function (dot3):     GB_Adot3B__max_land_uint16
-// A*B function (heap):      GB_AheapB__max_land_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_LAND_UINT16 || GxB_NO_MAX_LAND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_uint16
 GrB_Info GB_Adot2B__max_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_uint16
 GrB_Info GB_Adot3B__max_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_uint16
+GrB_Info GB_Asaxpy3B__max_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_uint32.c b/Source/Generated/GB_AxB__max_land_uint32.c
index 9a90208491..50de7808d1 100644
--- a/Source/Generated/GB_AxB__max_land_uint32.c
+++ b/Source/Generated/GB_AxB__max_land_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_uint32
 // A'*B function (dot2):     GB_Adot2B__max_land_uint32
 // A'*B function (dot3):     GB_Adot3B__max_land_uint32
-// A*B function (heap):      GB_AheapB__max_land_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_LAND_UINT32 || GxB_NO_MAX_LAND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_uint32
 GrB_Info GB_Adot2B__max_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_uint32
 GrB_Info GB_Adot3B__max_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_uint32
+GrB_Info GB_Asaxpy3B__max_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_uint64.c b/Source/Generated/GB_AxB__max_land_uint64.c
index 81bde0d5a0..ef6393cc8a 100644
--- a/Source/Generated/GB_AxB__max_land_uint64.c
+++ b/Source/Generated/GB_AxB__max_land_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_uint64
 // A'*B function (dot2):     GB_Adot2B__max_land_uint64
 // A'*B function (dot3):     GB_Adot3B__max_land_uint64
-// A*B function (heap):      GB_AheapB__max_land_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_LAND_UINT64 || GxB_NO_MAX_LAND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_uint64
 GrB_Info GB_Adot2B__max_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_uint64
 GrB_Info GB_Adot3B__max_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_land_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_land_uint8.c b/Source/Generated/GB_AxB__max_land_uint8.c
index 6046345510..e1f89d062c 100644
--- a/Source/Generated/GB_AxB__max_land_uint8.c
+++ b/Source/Generated/GB_AxB__max_land_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_land_uint8
 // A'*B function (dot2):     GB_Adot2B__max_land_uint8
 // A'*B function (dot3):     GB_Adot3B__max_land_uint8
-// A*B function (heap):      GB_AheapB__max_land_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_land_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_land_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_LAND_UINT8 || GxB_NO_MAX_LAND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_land_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_land_uint8
 GrB_Info GB_Adot2B__max_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_land_uint8
 GrB_Info GB_Adot3B__max_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_land_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_land_uint8
+GrB_Info GB_Asaxpy3B__max_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_fp32.c b/Source/Generated/GB_AxB__max_lor_fp32.c
index 3c74142756..9dffd8ea41 100644
--- a/Source/Generated/GB_AxB__max_lor_fp32.c
+++ b/Source/Generated/GB_AxB__max_lor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_fp32
 // A'*B function (dot2):     GB_Adot2B__max_lor_fp32
 // A'*B function (dot3):     GB_Adot3B__max_lor_fp32
-// A*B function (heap):      GB_AheapB__max_lor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, ((aik != 0) || (bkj != 0)))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, ((x != 0) || (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, ((x != 0) || (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_LOR_FP32 || GxB_NO_MAX_LOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_fp32
 GrB_Info GB_Adot2B__max_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_fp32
 GrB_Info GB_Adot3B__max_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_lor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_fp64.c b/Source/Generated/GB_AxB__max_lor_fp64.c
index 40ea64e6a5..2b5dc0c758 100644
--- a/Source/Generated/GB_AxB__max_lor_fp64.c
+++ b/Source/Generated/GB_AxB__max_lor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_fp64
 // A'*B function (dot2):     GB_Adot2B__max_lor_fp64
 // A'*B function (dot3):     GB_Adot3B__max_lor_fp64
-// A*B function (heap):      GB_AheapB__max_lor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, ((aik != 0) || (bkj != 0)))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, ((x != 0) || (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, ((x != 0) || (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_LOR_FP64 || GxB_NO_MAX_LOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_fp64
 GrB_Info GB_Adot2B__max_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_fp64
 GrB_Info GB_Adot3B__max_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_lor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_int16.c b/Source/Generated/GB_AxB__max_lor_int16.c
index 92d2dcf4b0..00728ee051 100644
--- a/Source/Generated/GB_AxB__max_lor_int16.c
+++ b/Source/Generated/GB_AxB__max_lor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_int16
 // A'*B function (dot2):     GB_Adot2B__max_lor_int16
 // A'*B function (dot3):     GB_Adot3B__max_lor_int16
-// A*B function (heap):      GB_AheapB__max_lor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_LOR_INT16 || GxB_NO_MAX_LOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_int16
 GrB_Info GB_Adot2B__max_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_int16
 GrB_Info GB_Adot3B__max_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_int16
+GrB_Info GB_Asaxpy3B__max_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_int32.c b/Source/Generated/GB_AxB__max_lor_int32.c
index 5252428ecf..d7482b6bea 100644
--- a/Source/Generated/GB_AxB__max_lor_int32.c
+++ b/Source/Generated/GB_AxB__max_lor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_int32
 // A'*B function (dot2):     GB_Adot2B__max_lor_int32
 // A'*B function (dot3):     GB_Adot3B__max_lor_int32
-// A*B function (heap):      GB_AheapB__max_lor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_LOR_INT32 || GxB_NO_MAX_LOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_int32
 GrB_Info GB_Adot2B__max_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_int32
 GrB_Info GB_Adot3B__max_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_int32
+GrB_Info GB_Asaxpy3B__max_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_int64.c b/Source/Generated/GB_AxB__max_lor_int64.c
index b5bfaaae25..5092f755d5 100644
--- a/Source/Generated/GB_AxB__max_lor_int64.c
+++ b/Source/Generated/GB_AxB__max_lor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_int64
 // A'*B function (dot2):     GB_Adot2B__max_lor_int64
 // A'*B function (dot3):     GB_Adot3B__max_lor_int64
-// A*B function (heap):      GB_AheapB__max_lor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_LOR_INT64 || GxB_NO_MAX_LOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_int64
 GrB_Info GB_Adot2B__max_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_int64
 GrB_Info GB_Adot3B__max_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_int64
+GrB_Info GB_Asaxpy3B__max_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_int8.c b/Source/Generated/GB_AxB__max_lor_int8.c
index c53a473b3a..9eaed7d07a 100644
--- a/Source/Generated/GB_AxB__max_lor_int8.c
+++ b/Source/Generated/GB_AxB__max_lor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_int8
 // A'*B function (dot2):     GB_Adot2B__max_lor_int8
 // A'*B function (dot3):     GB_Adot3B__max_lor_int8
-// A*B function (heap):      GB_AheapB__max_lor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_LOR_INT8 || GxB_NO_MAX_LOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_int8
 GrB_Info GB_Adot2B__max_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_int8
 GrB_Info GB_Adot3B__max_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_int8
+GrB_Info GB_Asaxpy3B__max_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_uint16.c b/Source/Generated/GB_AxB__max_lor_uint16.c
index dd7c0c6426..dce0d1317b 100644
--- a/Source/Generated/GB_AxB__max_lor_uint16.c
+++ b/Source/Generated/GB_AxB__max_lor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_uint16
 // A'*B function (dot2):     GB_Adot2B__max_lor_uint16
 // A'*B function (dot3):     GB_Adot3B__max_lor_uint16
-// A*B function (heap):      GB_AheapB__max_lor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_LOR_UINT16 || GxB_NO_MAX_LOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_uint16
 GrB_Info GB_Adot2B__max_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_uint16
 GrB_Info GB_Adot3B__max_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_uint16
+GrB_Info GB_Asaxpy3B__max_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_uint32.c b/Source/Generated/GB_AxB__max_lor_uint32.c
index d46d3f191f..338ce35b25 100644
--- a/Source/Generated/GB_AxB__max_lor_uint32.c
+++ b/Source/Generated/GB_AxB__max_lor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_uint32
 // A'*B function (dot2):     GB_Adot2B__max_lor_uint32
 // A'*B function (dot3):     GB_Adot3B__max_lor_uint32
-// A*B function (heap):      GB_AheapB__max_lor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_LOR_UINT32 || GxB_NO_MAX_LOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_uint32
 GrB_Info GB_Adot2B__max_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_uint32
 GrB_Info GB_Adot3B__max_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_uint32
+GrB_Info GB_Asaxpy3B__max_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_uint64.c b/Source/Generated/GB_AxB__max_lor_uint64.c
index f699d92200..17edd7e9a3 100644
--- a/Source/Generated/GB_AxB__max_lor_uint64.c
+++ b/Source/Generated/GB_AxB__max_lor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_uint64
 // A'*B function (dot2):     GB_Adot2B__max_lor_uint64
 // A'*B function (dot3):     GB_Adot3B__max_lor_uint64
-// A*B function (heap):      GB_AheapB__max_lor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_LOR_UINT64 || GxB_NO_MAX_LOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_uint64
 GrB_Info GB_Adot2B__max_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_uint64
 GrB_Info GB_Adot3B__max_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_lor_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lor_uint8.c b/Source/Generated/GB_AxB__max_lor_uint8.c
index ca612b2764..c02ae4391b 100644
--- a/Source/Generated/GB_AxB__max_lor_uint8.c
+++ b/Source/Generated/GB_AxB__max_lor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lor_uint8
 // A'*B function (dot2):     GB_Adot2B__max_lor_uint8
 // A'*B function (dot3):     GB_Adot3B__max_lor_uint8
-// A*B function (heap):      GB_AheapB__max_lor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_lor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_LOR_UINT8 || GxB_NO_MAX_LOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lor_uint8
 GrB_Info GB_Adot2B__max_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lor_uint8
 GrB_Info GB_Adot3B__max_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lor_uint8
+GrB_Info GB_Asaxpy3B__max_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_fp32.c b/Source/Generated/GB_AxB__max_lxor_fp32.c
index de9f8c3e3f..260d575a7e 100644
--- a/Source/Generated/GB_AxB__max_lxor_fp32.c
+++ b/Source/Generated/GB_AxB__max_lxor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_fp32
 // A'*B function (dot2):     GB_Adot2B__max_lxor_fp32
 // A'*B function (dot3):     GB_Adot3B__max_lxor_fp32
-// A*B function (heap):      GB_AheapB__max_lxor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, ((aik != 0) != (bkj != 0)))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, ((x != 0) != (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, ((x != 0) != (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_LXOR_FP32 || GxB_NO_MAX_LXOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_fp32
 GrB_Info GB_Adot2B__max_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_fp32
 GrB_Info GB_Adot3B__max_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_lxor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_fp64.c b/Source/Generated/GB_AxB__max_lxor_fp64.c
index ec25eafaf9..e3c29be887 100644
--- a/Source/Generated/GB_AxB__max_lxor_fp64.c
+++ b/Source/Generated/GB_AxB__max_lxor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_fp64
 // A'*B function (dot2):     GB_Adot2B__max_lxor_fp64
 // A'*B function (dot3):     GB_Adot3B__max_lxor_fp64
-// A*B function (heap):      GB_AheapB__max_lxor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, ((aik != 0) != (bkj != 0)))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, ((x != 0) != (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, ((x != 0) != (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_LXOR_FP64 || GxB_NO_MAX_LXOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_fp64
 GrB_Info GB_Adot2B__max_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_fp64
 GrB_Info GB_Adot3B__max_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_lxor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_int16.c b/Source/Generated/GB_AxB__max_lxor_int16.c
index 8849909ab1..6ba0a1830e 100644
--- a/Source/Generated/GB_AxB__max_lxor_int16.c
+++ b/Source/Generated/GB_AxB__max_lxor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_int16
 // A'*B function (dot2):     GB_Adot2B__max_lxor_int16
 // A'*B function (dot3):     GB_Adot3B__max_lxor_int16
-// A*B function (heap):      GB_AheapB__max_lxor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_LXOR_INT16 || GxB_NO_MAX_LXOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_int16
 GrB_Info GB_Adot2B__max_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_int16
 GrB_Info GB_Adot3B__max_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_int16
+GrB_Info GB_Asaxpy3B__max_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_int32.c b/Source/Generated/GB_AxB__max_lxor_int32.c
index 1017225d04..2d15a7f151 100644
--- a/Source/Generated/GB_AxB__max_lxor_int32.c
+++ b/Source/Generated/GB_AxB__max_lxor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_int32
 // A'*B function (dot2):     GB_Adot2B__max_lxor_int32
 // A'*B function (dot3):     GB_Adot3B__max_lxor_int32
-// A*B function (heap):      GB_AheapB__max_lxor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_LXOR_INT32 || GxB_NO_MAX_LXOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_int32
 GrB_Info GB_Adot2B__max_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_int32
 GrB_Info GB_Adot3B__max_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_int32
+GrB_Info GB_Asaxpy3B__max_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_int64.c b/Source/Generated/GB_AxB__max_lxor_int64.c
index 8fc3e284cb..58ea0d5f71 100644
--- a/Source/Generated/GB_AxB__max_lxor_int64.c
+++ b/Source/Generated/GB_AxB__max_lxor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_int64
 // A'*B function (dot2):     GB_Adot2B__max_lxor_int64
 // A'*B function (dot3):     GB_Adot3B__max_lxor_int64
-// A*B function (heap):      GB_AheapB__max_lxor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_LXOR_INT64 || GxB_NO_MAX_LXOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_int64
 GrB_Info GB_Adot2B__max_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_int64
 GrB_Info GB_Adot3B__max_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_int64
+GrB_Info GB_Asaxpy3B__max_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_int8.c b/Source/Generated/GB_AxB__max_lxor_int8.c
index 5cec656ca7..1c6b8eb754 100644
--- a/Source/Generated/GB_AxB__max_lxor_int8.c
+++ b/Source/Generated/GB_AxB__max_lxor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_int8
 // A'*B function (dot2):     GB_Adot2B__max_lxor_int8
 // A'*B function (dot3):     GB_Adot3B__max_lxor_int8
-// A*B function (heap):      GB_AheapB__max_lxor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_LXOR_INT8 || GxB_NO_MAX_LXOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_int8
 GrB_Info GB_Adot2B__max_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_int8
 GrB_Info GB_Adot3B__max_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_int8
+GrB_Info GB_Asaxpy3B__max_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_uint16.c b/Source/Generated/GB_AxB__max_lxor_uint16.c
index 369b077478..12c77b7f70 100644
--- a/Source/Generated/GB_AxB__max_lxor_uint16.c
+++ b/Source/Generated/GB_AxB__max_lxor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_uint16
 // A'*B function (dot2):     GB_Adot2B__max_lxor_uint16
 // A'*B function (dot3):     GB_Adot3B__max_lxor_uint16
-// A*B function (heap):      GB_AheapB__max_lxor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_LXOR_UINT16 || GxB_NO_MAX_LXOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_uint16
 GrB_Info GB_Adot2B__max_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_uint16
 GrB_Info GB_Adot3B__max_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_uint16
+GrB_Info GB_Asaxpy3B__max_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_uint32.c b/Source/Generated/GB_AxB__max_lxor_uint32.c
index cd1218f545..11022ad611 100644
--- a/Source/Generated/GB_AxB__max_lxor_uint32.c
+++ b/Source/Generated/GB_AxB__max_lxor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_uint32
 // A'*B function (dot2):     GB_Adot2B__max_lxor_uint32
 // A'*B function (dot3):     GB_Adot3B__max_lxor_uint32
-// A*B function (heap):      GB_AheapB__max_lxor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_LXOR_UINT32 || GxB_NO_MAX_LXOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_uint32
 GrB_Info GB_Adot2B__max_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_uint32
 GrB_Info GB_Adot3B__max_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_uint32
+GrB_Info GB_Asaxpy3B__max_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_uint64.c b/Source/Generated/GB_AxB__max_lxor_uint64.c
index b2353e178d..9fcb3e6073 100644
--- a/Source/Generated/GB_AxB__max_lxor_uint64.c
+++ b/Source/Generated/GB_AxB__max_lxor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_uint64
 // A'*B function (dot2):     GB_Adot2B__max_lxor_uint64
 // A'*B function (dot3):     GB_Adot3B__max_lxor_uint64
-// A*B function (heap):      GB_AheapB__max_lxor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_LXOR_UINT64 || GxB_NO_MAX_LXOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_uint64
 GrB_Info GB_Adot2B__max_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_uint64
 GrB_Info GB_Adot3B__max_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_lxor_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_lxor_uint8.c b/Source/Generated/GB_AxB__max_lxor_uint8.c
index 5cd2f2a413..6cdd995c29 100644
--- a/Source/Generated/GB_AxB__max_lxor_uint8.c
+++ b/Source/Generated/GB_AxB__max_lxor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_lxor_uint8
 // A'*B function (dot2):     GB_Adot2B__max_lxor_uint8
 // A'*B function (dot3):     GB_Adot3B__max_lxor_uint8
-// A*B function (heap):      GB_AheapB__max_lxor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_lxor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_lxor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_LXOR_UINT8 || GxB_NO_MAX_LXOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_lxor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_lxor_uint8
 GrB_Info GB_Adot2B__max_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_lxor_uint8
 GrB_Info GB_Adot3B__max_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_lxor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_lxor_uint8
+GrB_Info GB_Asaxpy3B__max_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_fp32.c b/Source/Generated/GB_AxB__max_max_fp32.c
index 230c9d902c..af9e5ad93f 100644
--- a/Source/Generated/GB_AxB__max_max_fp32.c
+++ b/Source/Generated/GB_AxB__max_max_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_fp32
 // A'*B function (dot2):     GB_Adot2B__max_max_fp32
 // A'*B function (dot3):     GB_Adot3B__max_max_fp32
-// A*B function (heap):      GB_AheapB__max_max_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmaxf (aik, bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, fmaxf (aik, bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmaxf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmaxf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, fmaxf (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, fmaxf (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_MAX_MAX_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_max_fp32
 GrB_Info GB_Adot2B__max_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_fp32
 GrB_Info GB_Adot3B__max_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_max_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_fp64.c b/Source/Generated/GB_AxB__max_max_fp64.c
index 99c95e4792..b13e6ad026 100644
--- a/Source/Generated/GB_AxB__max_max_fp64.c
+++ b/Source/Generated/GB_AxB__max_max_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_fp64
 // A'*B function (dot2):     GB_Adot2B__max_max_fp64
 // A'*B function (dot3):     GB_Adot3B__max_max_fp64
-// A*B function (heap):      GB_AheapB__max_max_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmax (aik, bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, fmax (aik, bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmax (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmax (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, fmax (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, fmax (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_MAX_MAX_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_max_fp64
 GrB_Info GB_Adot2B__max_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_fp64
 GrB_Info GB_Adot3B__max_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_max_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_int16.c b/Source/Generated/GB_AxB__max_max_int16.c
index 89ab85d69f..c6198048b0 100644
--- a/Source/Generated/GB_AxB__max_max_int16.c
+++ b/Source/Generated/GB_AxB__max_max_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_int16
 // A'*B function (dot2):     GB_Adot2B__max_max_int16
 // A'*B function (dot3):     GB_Adot3B__max_max_int16
-// A*B function (heap):      GB_AheapB__max_max_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_MAX_MAX_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_int16
 GrB_Info GB_Adot2B__max_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_int16
 GrB_Info GB_Adot3B__max_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_int16
+GrB_Info GB_Asaxpy3B__max_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_int32.c b/Source/Generated/GB_AxB__max_max_int32.c
index 7a62e0ff96..17cf08720b 100644
--- a/Source/Generated/GB_AxB__max_max_int32.c
+++ b/Source/Generated/GB_AxB__max_max_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_int32
 // A'*B function (dot2):     GB_Adot2B__max_max_int32
 // A'*B function (dot3):     GB_Adot3B__max_max_int32
-// A*B function (heap):      GB_AheapB__max_max_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_MAX_MAX_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_int32
 GrB_Info GB_Adot2B__max_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_int32
 GrB_Info GB_Adot3B__max_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_int32
+GrB_Info GB_Asaxpy3B__max_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_int64.c b/Source/Generated/GB_AxB__max_max_int64.c
index 20d9378a20..3bb3a18ff6 100644
--- a/Source/Generated/GB_AxB__max_max_int64.c
+++ b/Source/Generated/GB_AxB__max_max_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_int64
 // A'*B function (dot2):     GB_Adot2B__max_max_int64
 // A'*B function (dot3):     GB_Adot3B__max_max_int64
-// A*B function (heap):      GB_AheapB__max_max_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_MAX_MAX_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_int64
 GrB_Info GB_Adot2B__max_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_int64
 GrB_Info GB_Adot3B__max_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_int64
+GrB_Info GB_Asaxpy3B__max_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_int8.c b/Source/Generated/GB_AxB__max_max_int8.c
index 70adfa271b..9282fd16a4 100644
--- a/Source/Generated/GB_AxB__max_max_int8.c
+++ b/Source/Generated/GB_AxB__max_max_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_int8
 // A'*B function (dot2):     GB_Adot2B__max_max_int8
 // A'*B function (dot3):     GB_Adot3B__max_max_int8
-// A*B function (heap):      GB_AheapB__max_max_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_MAX_MAX_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_int8
 GrB_Info GB_Adot2B__max_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_int8
 GrB_Info GB_Adot3B__max_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_int8
+GrB_Info GB_Asaxpy3B__max_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_uint16.c b/Source/Generated/GB_AxB__max_max_uint16.c
index 3b79bb9c48..22eeb58563 100644
--- a/Source/Generated/GB_AxB__max_max_uint16.c
+++ b/Source/Generated/GB_AxB__max_max_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_uint16
 // A'*B function (dot2):     GB_Adot2B__max_max_uint16
 // A'*B function (dot3):     GB_Adot3B__max_max_uint16
-// A*B function (heap):      GB_AheapB__max_max_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_MAX_MAX_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_uint16
 GrB_Info GB_Adot2B__max_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_uint16
 GrB_Info GB_Adot3B__max_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_uint16
+GrB_Info GB_Asaxpy3B__max_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_uint32.c b/Source/Generated/GB_AxB__max_max_uint32.c
index 9895528bd7..b8806c53b5 100644
--- a/Source/Generated/GB_AxB__max_max_uint32.c
+++ b/Source/Generated/GB_AxB__max_max_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_uint32
 // A'*B function (dot2):     GB_Adot2B__max_max_uint32
 // A'*B function (dot3):     GB_Adot3B__max_max_uint32
-// A*B function (heap):      GB_AheapB__max_max_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_MAX_MAX_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_uint32
 GrB_Info GB_Adot2B__max_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_uint32
 GrB_Info GB_Adot3B__max_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_uint32
+GrB_Info GB_Asaxpy3B__max_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_uint64.c b/Source/Generated/GB_AxB__max_max_uint64.c
index f5e796aeff..80a520b80d 100644
--- a/Source/Generated/GB_AxB__max_max_uint64.c
+++ b/Source/Generated/GB_AxB__max_max_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_uint64
 // A'*B function (dot2):     GB_Adot2B__max_max_uint64
 // A'*B function (dot3):     GB_Adot3B__max_max_uint64
-// A*B function (heap):      GB_AheapB__max_max_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_MAX_MAX_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_uint64
 GrB_Info GB_Adot2B__max_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_uint64
 GrB_Info GB_Adot3B__max_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_max_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_max_uint8.c b/Source/Generated/GB_AxB__max_max_uint8.c
index 943359e48b..3ea35322ea 100644
--- a/Source/Generated/GB_AxB__max_max_uint8.c
+++ b/Source/Generated/GB_AxB__max_max_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_max_uint8
 // A'*B function (dot2):     GB_Adot2B__max_max_uint8
 // A'*B function (dot3):     GB_Adot3B__max_max_uint8
-// A*B function (heap):      GB_AheapB__max_max_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_max_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_max_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_MAX_MAX_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_max_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_max_uint8
 GrB_Info GB_Adot2B__max_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_max_uint8
 GrB_Info GB_Adot3B__max_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_max_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_max_uint8
+GrB_Info GB_Asaxpy3B__max_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_fp32.c b/Source/Generated/GB_AxB__max_min_fp32.c
index 27ae1a2bf9..e1b7713d45 100644
--- a/Source/Generated/GB_AxB__max_min_fp32.c
+++ b/Source/Generated/GB_AxB__max_min_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_fp32
 // A'*B function (dot2):     GB_Adot2B__max_min_fp32
 // A'*B function (dot3):     GB_Adot3B__max_min_fp32
-// A*B function (heap):      GB_AheapB__max_min_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fminf (aik, bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, fminf (aik, bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fminf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fminf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, fminf (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, fminf (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_MIN_FP32 || GxB_NO_MAX_MIN_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_min_fp32
 GrB_Info GB_Adot2B__max_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_fp32
 GrB_Info GB_Adot3B__max_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_min_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_fp64.c b/Source/Generated/GB_AxB__max_min_fp64.c
index a92a2a8489..cc7eeeb919 100644
--- a/Source/Generated/GB_AxB__max_min_fp64.c
+++ b/Source/Generated/GB_AxB__max_min_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_fp64
 // A'*B function (dot2):     GB_Adot2B__max_min_fp64
 // A'*B function (dot3):     GB_Adot3B__max_min_fp64
-// A*B function (heap):      GB_AheapB__max_min_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmin (aik, bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, fmin (aik, bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmin (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmin (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, fmin (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, fmin (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_MIN_FP64 || GxB_NO_MAX_MIN_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_min_fp64
 GrB_Info GB_Adot2B__max_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_fp64
 GrB_Info GB_Adot3B__max_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_min_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_int16.c b/Source/Generated/GB_AxB__max_min_int16.c
index 15e3cc1d15..ce96f1c861 100644
--- a/Source/Generated/GB_AxB__max_min_int16.c
+++ b/Source/Generated/GB_AxB__max_min_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_int16
 // A'*B function (dot2):     GB_Adot2B__max_min_int16
 // A'*B function (dot3):     GB_Adot3B__max_min_int16
-// A*B function (heap):      GB_AheapB__max_min_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_MIN_INT16 || GxB_NO_MAX_MIN_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_int16
 GrB_Info GB_Adot2B__max_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_int16
 GrB_Info GB_Adot3B__max_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_int16
+GrB_Info GB_Asaxpy3B__max_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_int32.c b/Source/Generated/GB_AxB__max_min_int32.c
index 13765208cf..c2c45818fe 100644
--- a/Source/Generated/GB_AxB__max_min_int32.c
+++ b/Source/Generated/GB_AxB__max_min_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_int32
 // A'*B function (dot2):     GB_Adot2B__max_min_int32
 // A'*B function (dot3):     GB_Adot3B__max_min_int32
-// A*B function (heap):      GB_AheapB__max_min_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_MIN_INT32 || GxB_NO_MAX_MIN_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_int32
 GrB_Info GB_Adot2B__max_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_int32
 GrB_Info GB_Adot3B__max_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_int32
+GrB_Info GB_Asaxpy3B__max_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_int64.c b/Source/Generated/GB_AxB__max_min_int64.c
index c04d82563c..b13e8a398c 100644
--- a/Source/Generated/GB_AxB__max_min_int64.c
+++ b/Source/Generated/GB_AxB__max_min_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_int64
 // A'*B function (dot2):     GB_Adot2B__max_min_int64
 // A'*B function (dot3):     GB_Adot3B__max_min_int64
-// A*B function (heap):      GB_AheapB__max_min_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_MIN_INT64 || GxB_NO_MAX_MIN_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_int64
 GrB_Info GB_Adot2B__max_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_int64
 GrB_Info GB_Adot3B__max_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_int64
+GrB_Info GB_Asaxpy3B__max_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_int8.c b/Source/Generated/GB_AxB__max_min_int8.c
index c5493f1981..a0b99bfbee 100644
--- a/Source/Generated/GB_AxB__max_min_int8.c
+++ b/Source/Generated/GB_AxB__max_min_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_int8
 // A'*B function (dot2):     GB_Adot2B__max_min_int8
 // A'*B function (dot3):     GB_Adot3B__max_min_int8
-// A*B function (heap):      GB_AheapB__max_min_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_MIN_INT8 || GxB_NO_MAX_MIN_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_int8
 GrB_Info GB_Adot2B__max_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_int8
 GrB_Info GB_Adot3B__max_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_int8
+GrB_Info GB_Asaxpy3B__max_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_uint16.c b/Source/Generated/GB_AxB__max_min_uint16.c
index b925640fa3..e39f27f8c5 100644
--- a/Source/Generated/GB_AxB__max_min_uint16.c
+++ b/Source/Generated/GB_AxB__max_min_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_uint16
 // A'*B function (dot2):     GB_Adot2B__max_min_uint16
 // A'*B function (dot3):     GB_Adot3B__max_min_uint16
-// A*B function (heap):      GB_AheapB__max_min_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_MAX_MIN_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_uint16
 GrB_Info GB_Adot2B__max_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_uint16
 GrB_Info GB_Adot3B__max_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_uint16
+GrB_Info GB_Asaxpy3B__max_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_uint32.c b/Source/Generated/GB_AxB__max_min_uint32.c
index 4140ab2849..2032cf379a 100644
--- a/Source/Generated/GB_AxB__max_min_uint32.c
+++ b/Source/Generated/GB_AxB__max_min_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_uint32
 // A'*B function (dot2):     GB_Adot2B__max_min_uint32
 // A'*B function (dot3):     GB_Adot3B__max_min_uint32
-// A*B function (heap):      GB_AheapB__max_min_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_MAX_MIN_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_uint32
 GrB_Info GB_Adot2B__max_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_uint32
 GrB_Info GB_Adot3B__max_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_uint32
+GrB_Info GB_Asaxpy3B__max_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_uint64.c b/Source/Generated/GB_AxB__max_min_uint64.c
index a93a4c071a..1d1088de5a 100644
--- a/Source/Generated/GB_AxB__max_min_uint64.c
+++ b/Source/Generated/GB_AxB__max_min_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_uint64
 // A'*B function (dot2):     GB_Adot2B__max_min_uint64
 // A'*B function (dot3):     GB_Adot3B__max_min_uint64
-// A*B function (heap):      GB_AheapB__max_min_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_MAX_MIN_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_uint64
 GrB_Info GB_Adot2B__max_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_uint64
 GrB_Info GB_Adot3B__max_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_min_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_min_uint8.c b/Source/Generated/GB_AxB__max_min_uint8.c
index a3944b6919..edd5b0ab52 100644
--- a/Source/Generated/GB_AxB__max_min_uint8.c
+++ b/Source/Generated/GB_AxB__max_min_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_min_uint8
 // A'*B function (dot2):     GB_Adot2B__max_min_uint8
 // A'*B function (dot3):     GB_Adot3B__max_min_uint8
-// A*B function (heap):      GB_AheapB__max_min_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_min_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_min_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_MAX_MIN_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_min_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_min_uint8
 GrB_Info GB_Adot2B__max_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_min_uint8
 GrB_Info GB_Adot3B__max_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_min_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_min_uint8
+GrB_Info GB_Asaxpy3B__max_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_fp32.c b/Source/Generated/GB_AxB__max_minus_fp32.c
index f6318d428e..c35a5405c4 100644
--- a/Source/Generated/GB_AxB__max_minus_fp32.c
+++ b/Source/Generated/GB_AxB__max_minus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_fp32
 // A'*B function (dot2):     GB_Adot2B__max_minus_fp32
 // A'*B function (dot3):     GB_Adot3B__max_minus_fp32
-// A*B function (heap):      GB_AheapB__max_minus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik - bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x - y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x - y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_MINUS_FP32 || GxB_NO_MAX_MINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_fp32
 GrB_Info GB_Adot2B__max_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_fp32
 GrB_Info GB_Adot3B__max_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_minus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_fp64.c b/Source/Generated/GB_AxB__max_minus_fp64.c
index e18859b9c6..c86d46e1c9 100644
--- a/Source/Generated/GB_AxB__max_minus_fp64.c
+++ b/Source/Generated/GB_AxB__max_minus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_fp64
 // A'*B function (dot2):     GB_Adot2B__max_minus_fp64
 // A'*B function (dot3):     GB_Adot3B__max_minus_fp64
-// A*B function (heap):      GB_AheapB__max_minus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik - bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x - y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x - y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_MINUS_FP64 || GxB_NO_MAX_MINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_fp64
 GrB_Info GB_Adot2B__max_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_fp64
 GrB_Info GB_Adot3B__max_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_minus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_int16.c b/Source/Generated/GB_AxB__max_minus_int16.c
index cc58deb463..6fbf81733a 100644
--- a/Source/Generated/GB_AxB__max_minus_int16.c
+++ b/Source/Generated/GB_AxB__max_minus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_int16
 // A'*B function (dot2):     GB_Adot2B__max_minus_int16
 // A'*B function (dot3):     GB_Adot3B__max_minus_int16
-// A*B function (heap):      GB_AheapB__max_minus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_MINUS_INT16 || GxB_NO_MAX_MINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_int16
 GrB_Info GB_Adot2B__max_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_int16
 GrB_Info GB_Adot3B__max_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_int16
+GrB_Info GB_Asaxpy3B__max_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_int32.c b/Source/Generated/GB_AxB__max_minus_int32.c
index d2bacf6b58..ecf1ff2214 100644
--- a/Source/Generated/GB_AxB__max_minus_int32.c
+++ b/Source/Generated/GB_AxB__max_minus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_int32
 // A'*B function (dot2):     GB_Adot2B__max_minus_int32
 // A'*B function (dot3):     GB_Adot3B__max_minus_int32
-// A*B function (heap):      GB_AheapB__max_minus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_MINUS_INT32 || GxB_NO_MAX_MINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_int32
 GrB_Info GB_Adot2B__max_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_int32
 GrB_Info GB_Adot3B__max_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_int32
+GrB_Info GB_Asaxpy3B__max_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_int64.c b/Source/Generated/GB_AxB__max_minus_int64.c
index 7a9f595e08..3d9be7b5f9 100644
--- a/Source/Generated/GB_AxB__max_minus_int64.c
+++ b/Source/Generated/GB_AxB__max_minus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_int64
 // A'*B function (dot2):     GB_Adot2B__max_minus_int64
 // A'*B function (dot3):     GB_Adot3B__max_minus_int64
-// A*B function (heap):      GB_AheapB__max_minus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_MINUS_INT64 || GxB_NO_MAX_MINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_int64
 GrB_Info GB_Adot2B__max_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_int64
 GrB_Info GB_Adot3B__max_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_int64
+GrB_Info GB_Asaxpy3B__max_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_int8.c b/Source/Generated/GB_AxB__max_minus_int8.c
index c079c05718..bb6429f4a7 100644
--- a/Source/Generated/GB_AxB__max_minus_int8.c
+++ b/Source/Generated/GB_AxB__max_minus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_int8
 // A'*B function (dot2):     GB_Adot2B__max_minus_int8
 // A'*B function (dot3):     GB_Adot3B__max_minus_int8
-// A*B function (heap):      GB_AheapB__max_minus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_MINUS_INT8 || GxB_NO_MAX_MINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_int8
 GrB_Info GB_Adot2B__max_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_int8
 GrB_Info GB_Adot3B__max_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_int8
+GrB_Info GB_Asaxpy3B__max_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_uint16.c b/Source/Generated/GB_AxB__max_minus_uint16.c
index 1060589f95..38b149f26d 100644
--- a/Source/Generated/GB_AxB__max_minus_uint16.c
+++ b/Source/Generated/GB_AxB__max_minus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_uint16
 // A'*B function (dot2):     GB_Adot2B__max_minus_uint16
 // A'*B function (dot3):     GB_Adot3B__max_minus_uint16
-// A*B function (heap):      GB_AheapB__max_minus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_MINUS_UINT16 || GxB_NO_MAX_MINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_uint16
 GrB_Info GB_Adot2B__max_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_uint16
 GrB_Info GB_Adot3B__max_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_uint16
+GrB_Info GB_Asaxpy3B__max_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_uint32.c b/Source/Generated/GB_AxB__max_minus_uint32.c
index 5a0738bdef..53354c5b31 100644
--- a/Source/Generated/GB_AxB__max_minus_uint32.c
+++ b/Source/Generated/GB_AxB__max_minus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_uint32
 // A'*B function (dot2):     GB_Adot2B__max_minus_uint32
 // A'*B function (dot3):     GB_Adot3B__max_minus_uint32
-// A*B function (heap):      GB_AheapB__max_minus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_MINUS_UINT32 || GxB_NO_MAX_MINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_uint32
 GrB_Info GB_Adot2B__max_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_uint32
 GrB_Info GB_Adot3B__max_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_uint32
+GrB_Info GB_Asaxpy3B__max_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_uint64.c b/Source/Generated/GB_AxB__max_minus_uint64.c
index cdb38b3a3f..df2c4a794c 100644
--- a/Source/Generated/GB_AxB__max_minus_uint64.c
+++ b/Source/Generated/GB_AxB__max_minus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_uint64
 // A'*B function (dot2):     GB_Adot2B__max_minus_uint64
 // A'*B function (dot3):     GB_Adot3B__max_minus_uint64
-// A*B function (heap):      GB_AheapB__max_minus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_MINUS_UINT64 || GxB_NO_MAX_MINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_uint64
 GrB_Info GB_Adot2B__max_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_uint64
 GrB_Info GB_Adot3B__max_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_minus_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_minus_uint8.c b/Source/Generated/GB_AxB__max_minus_uint8.c
index 2accf4ead8..776af6a98d 100644
--- a/Source/Generated/GB_AxB__max_minus_uint8.c
+++ b/Source/Generated/GB_AxB__max_minus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_minus_uint8
 // A'*B function (dot2):     GB_Adot2B__max_minus_uint8
 // A'*B function (dot3):     GB_Adot3B__max_minus_uint8
-// A*B function (heap):      GB_AheapB__max_minus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_minus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_minus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik - bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x - y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_MINUS_UINT8 || GxB_NO_MAX_MINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_minus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_minus_uint8
 GrB_Info GB_Adot2B__max_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_minus_uint8
 GrB_Info GB_Adot3B__max_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_minus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_minus_uint8
+GrB_Info GB_Asaxpy3B__max_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_fp32.c b/Source/Generated/GB_AxB__max_plus_fp32.c
index 740de3f964..3529f54fc4 100644
--- a/Source/Generated/GB_AxB__max_plus_fp32.c
+++ b/Source/Generated/GB_AxB__max_plus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_fp32
 // A'*B function (dot2):     GB_Adot2B__max_plus_fp32
 // A'*B function (dot3):     GB_Adot3B__max_plus_fp32
-// A*B function (heap):      GB_AheapB__max_plus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik + bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x + y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x + y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_MAX_PLUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_fp32
 GrB_Info GB_Adot2B__max_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_fp32
 GrB_Info GB_Adot3B__max_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_plus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_fp64.c b/Source/Generated/GB_AxB__max_plus_fp64.c
index ee63e3c92e..e01f30a9a5 100644
--- a/Source/Generated/GB_AxB__max_plus_fp64.c
+++ b/Source/Generated/GB_AxB__max_plus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_fp64
 // A'*B function (dot2):     GB_Adot2B__max_plus_fp64
 // A'*B function (dot3):     GB_Adot3B__max_plus_fp64
-// A*B function (heap):      GB_AheapB__max_plus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik + bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x + y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x + y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_MAX_PLUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_fp64
 GrB_Info GB_Adot2B__max_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_fp64
 GrB_Info GB_Adot3B__max_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_plus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_int16.c b/Source/Generated/GB_AxB__max_plus_int16.c
index 08d59ee64c..f6013e6101 100644
--- a/Source/Generated/GB_AxB__max_plus_int16.c
+++ b/Source/Generated/GB_AxB__max_plus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_int16
 // A'*B function (dot2):     GB_Adot2B__max_plus_int16
 // A'*B function (dot3):     GB_Adot3B__max_plus_int16
-// A*B function (heap):      GB_AheapB__max_plus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_MAX_PLUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_int16
 GrB_Info GB_Adot2B__max_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_int16
 GrB_Info GB_Adot3B__max_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_int16
+GrB_Info GB_Asaxpy3B__max_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_int32.c b/Source/Generated/GB_AxB__max_plus_int32.c
index b94fdd79ae..147f18b910 100644
--- a/Source/Generated/GB_AxB__max_plus_int32.c
+++ b/Source/Generated/GB_AxB__max_plus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_int32
 // A'*B function (dot2):     GB_Adot2B__max_plus_int32
 // A'*B function (dot3):     GB_Adot3B__max_plus_int32
-// A*B function (heap):      GB_AheapB__max_plus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_MAX_PLUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_int32
 GrB_Info GB_Adot2B__max_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_int32
 GrB_Info GB_Adot3B__max_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_int32
+GrB_Info GB_Asaxpy3B__max_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_int64.c b/Source/Generated/GB_AxB__max_plus_int64.c
index 750de56df9..5fce7b02c9 100644
--- a/Source/Generated/GB_AxB__max_plus_int64.c
+++ b/Source/Generated/GB_AxB__max_plus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_int64
 // A'*B function (dot2):     GB_Adot2B__max_plus_int64
 // A'*B function (dot3):     GB_Adot3B__max_plus_int64
-// A*B function (heap):      GB_AheapB__max_plus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_MAX_PLUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_int64
 GrB_Info GB_Adot2B__max_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_int64
 GrB_Info GB_Adot3B__max_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_int64
+GrB_Info GB_Asaxpy3B__max_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_int8.c b/Source/Generated/GB_AxB__max_plus_int8.c
index 59e8b62395..323374fe6d 100644
--- a/Source/Generated/GB_AxB__max_plus_int8.c
+++ b/Source/Generated/GB_AxB__max_plus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_int8
 // A'*B function (dot2):     GB_Adot2B__max_plus_int8
 // A'*B function (dot3):     GB_Adot3B__max_plus_int8
-// A*B function (heap):      GB_AheapB__max_plus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_MAX_PLUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_int8
 GrB_Info GB_Adot2B__max_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_int8
 GrB_Info GB_Adot3B__max_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_int8
+GrB_Info GB_Asaxpy3B__max_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_uint16.c b/Source/Generated/GB_AxB__max_plus_uint16.c
index 17a1a727dd..de7d501524 100644
--- a/Source/Generated/GB_AxB__max_plus_uint16.c
+++ b/Source/Generated/GB_AxB__max_plus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_uint16
 // A'*B function (dot2):     GB_Adot2B__max_plus_uint16
 // A'*B function (dot3):     GB_Adot3B__max_plus_uint16
-// A*B function (heap):      GB_AheapB__max_plus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_MAX_PLUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_uint16
 GrB_Info GB_Adot2B__max_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_uint16
 GrB_Info GB_Adot3B__max_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_uint16
+GrB_Info GB_Asaxpy3B__max_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_uint32.c b/Source/Generated/GB_AxB__max_plus_uint32.c
index 9c5f4ff7c6..afc1c433b5 100644
--- a/Source/Generated/GB_AxB__max_plus_uint32.c
+++ b/Source/Generated/GB_AxB__max_plus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_uint32
 // A'*B function (dot2):     GB_Adot2B__max_plus_uint32
 // A'*B function (dot3):     GB_Adot3B__max_plus_uint32
-// A*B function (heap):      GB_AheapB__max_plus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_MAX_PLUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_uint32
 GrB_Info GB_Adot2B__max_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_uint32
 GrB_Info GB_Adot3B__max_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_uint32
+GrB_Info GB_Asaxpy3B__max_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_uint64.c b/Source/Generated/GB_AxB__max_plus_uint64.c
index 3720c2fb9a..f0795f3d96 100644
--- a/Source/Generated/GB_AxB__max_plus_uint64.c
+++ b/Source/Generated/GB_AxB__max_plus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_uint64
 // A'*B function (dot2):     GB_Adot2B__max_plus_uint64
 // A'*B function (dot3):     GB_Adot3B__max_plus_uint64
-// A*B function (heap):      GB_AheapB__max_plus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_MAX_PLUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_uint64
 GrB_Info GB_Adot2B__max_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_uint64
 GrB_Info GB_Adot3B__max_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_plus_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_plus_uint8.c b/Source/Generated/GB_AxB__max_plus_uint8.c
index fe235e64f2..ead8824d82 100644
--- a/Source/Generated/GB_AxB__max_plus_uint8.c
+++ b/Source/Generated/GB_AxB__max_plus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_plus_uint8
 // A'*B function (dot2):     GB_Adot2B__max_plus_uint8
 // A'*B function (dot3):     GB_Adot3B__max_plus_uint8
-// A*B function (heap):      GB_AheapB__max_plus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_plus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_plus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik + bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x + y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_MAX_PLUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_plus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_plus_uint8
 GrB_Info GB_Adot2B__max_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_plus_uint8
 GrB_Info GB_Adot3B__max_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_plus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_plus_uint8
+GrB_Info GB_Asaxpy3B__max_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_fp32.c b/Source/Generated/GB_AxB__max_rdiv_fp32.c
index 1833defff6..7b8dc0924b 100644
--- a/Source/Generated/GB_AxB__max_rdiv_fp32.c
+++ b/Source/Generated/GB_AxB__max_rdiv_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_fp32
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_fp32
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_fp32
-// A*B function (heap):      GB_AheapB__max_rdiv_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (bkj / aik))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (y / x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (y / x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_RDIV_FP32 || GxB_NO_MAX_RDIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_fp32
 GrB_Info GB_Adot2B__max_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_fp32
 GrB_Info GB_Adot3B__max_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_rdiv_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rdiv_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_fp64.c b/Source/Generated/GB_AxB__max_rdiv_fp64.c
index 1a56d505dc..5ccd2cc189 100644
--- a/Source/Generated/GB_AxB__max_rdiv_fp64.c
+++ b/Source/Generated/GB_AxB__max_rdiv_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_fp64
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_fp64
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_fp64
-// A*B function (heap):      GB_AheapB__max_rdiv_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (bkj / aik))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (y / x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (y / x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_RDIV_FP64 || GxB_NO_MAX_RDIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_fp64
 GrB_Info GB_Adot2B__max_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_fp64
 GrB_Info GB_Adot3B__max_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_rdiv_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rdiv_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_int16.c b/Source/Generated/GB_AxB__max_rdiv_int16.c
index 4a3a8c47aa..836888e91a 100644
--- a/Source/Generated/GB_AxB__max_rdiv_int16.c
+++ b/Source/Generated/GB_AxB__max_rdiv_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_int16
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_int16
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_int16
-// A*B function (heap):      GB_AheapB__max_rdiv_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 16)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 16) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_RDIV_INT16 || GxB_NO_MAX_RDIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_int16
 GrB_Info GB_Adot2B__max_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_int16
 GrB_Info GB_Adot3B__max_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_int16
+GrB_Info GB_Asaxpy3B__max_rdiv_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_int32.c b/Source/Generated/GB_AxB__max_rdiv_int32.c
index 4d179a7acd..6ab7656f7f 100644
--- a/Source/Generated/GB_AxB__max_rdiv_int32.c
+++ b/Source/Generated/GB_AxB__max_rdiv_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_int32
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_int32
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_int32
-// A*B function (heap):      GB_AheapB__max_rdiv_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 32)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 32) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_RDIV_INT32 || GxB_NO_MAX_RDIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_int32
 GrB_Info GB_Adot2B__max_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_int32
 GrB_Info GB_Adot3B__max_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_int32
+GrB_Info GB_Asaxpy3B__max_rdiv_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_int64.c b/Source/Generated/GB_AxB__max_rdiv_int64.c
index 074e8803aa..3230e1f175 100644
--- a/Source/Generated/GB_AxB__max_rdiv_int64.c
+++ b/Source/Generated/GB_AxB__max_rdiv_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_int64
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_int64
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_int64
-// A*B function (heap):      GB_AheapB__max_rdiv_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 64)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 64) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_RDIV_INT64 || GxB_NO_MAX_RDIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_int64
 GrB_Info GB_Adot2B__max_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_int64
 GrB_Info GB_Adot3B__max_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_int64
+GrB_Info GB_Asaxpy3B__max_rdiv_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_int8.c b/Source/Generated/GB_AxB__max_rdiv_int8.c
index c7005775c1..9a5fda31c8 100644
--- a/Source/Generated/GB_AxB__max_rdiv_int8.c
+++ b/Source/Generated/GB_AxB__max_rdiv_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_int8
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_int8
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_int8
-// A*B function (heap):      GB_AheapB__max_rdiv_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 8)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 8) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_RDIV_INT8 || GxB_NO_MAX_RDIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_int8
 GrB_Info GB_Adot2B__max_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_int8
 GrB_Info GB_Adot3B__max_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_int8
+GrB_Info GB_Asaxpy3B__max_rdiv_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_uint16.c b/Source/Generated/GB_AxB__max_rdiv_uint16.c
index 2cb684a6f0..4b358d5b56 100644
--- a/Source/Generated/GB_AxB__max_rdiv_uint16.c
+++ b/Source/Generated/GB_AxB__max_rdiv_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_uint16
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_uint16
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_uint16
-// A*B function (heap):      GB_AheapB__max_rdiv_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 16)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 16) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_RDIV_UINT16 || GxB_NO_MAX_RDIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_uint16
 GrB_Info GB_Adot2B__max_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_uint16
 GrB_Info GB_Adot3B__max_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_uint16
+GrB_Info GB_Asaxpy3B__max_rdiv_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_uint32.c b/Source/Generated/GB_AxB__max_rdiv_uint32.c
index a6c5e821a2..3ee44885a6 100644
--- a/Source/Generated/GB_AxB__max_rdiv_uint32.c
+++ b/Source/Generated/GB_AxB__max_rdiv_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_uint32
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_uint32
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_uint32
-// A*B function (heap):      GB_AheapB__max_rdiv_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 32)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 32) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_RDIV_UINT32 || GxB_NO_MAX_RDIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_uint32
 GrB_Info GB_Adot2B__max_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_uint32
 GrB_Info GB_Adot3B__max_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_uint32
+GrB_Info GB_Asaxpy3B__max_rdiv_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_uint64.c b/Source/Generated/GB_AxB__max_rdiv_uint64.c
index 5b7cf3c2b3..c1562a731f 100644
--- a/Source/Generated/GB_AxB__max_rdiv_uint64.c
+++ b/Source/Generated/GB_AxB__max_rdiv_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_uint64
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_uint64
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_uint64
-// A*B function (heap):      GB_AheapB__max_rdiv_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 64)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 64) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_RDIV_UINT64 || GxB_NO_MAX_RDIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_uint64
 GrB_Info GB_Adot2B__max_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_uint64
 GrB_Info GB_Adot3B__max_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_rdiv_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rdiv_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rdiv_uint8.c b/Source/Generated/GB_AxB__max_rdiv_uint8.c
index 25403db27c..6fd596dda1 100644
--- a/Source/Generated/GB_AxB__max_rdiv_uint8.c
+++ b/Source/Generated/GB_AxB__max_rdiv_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rdiv_uint8
 // A'*B function (dot2):     GB_Adot2B__max_rdiv_uint8
 // A'*B function (dot3):     GB_Adot3B__max_rdiv_uint8
-// A*B function (heap):      GB_AheapB__max_rdiv_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_rdiv_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rdiv_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 8)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 8) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_RDIV_UINT8 || GxB_NO_MAX_RDIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rdiv_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rdiv_uint8
 GrB_Info GB_Adot2B__max_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rdiv_uint8
 GrB_Info GB_Adot3B__max_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rdiv_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rdiv_uint8
+GrB_Info GB_Asaxpy3B__max_rdiv_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_fp32.c b/Source/Generated/GB_AxB__max_rminus_fp32.c
index ed554f5d48..58b1cd6571 100644
--- a/Source/Generated/GB_AxB__max_rminus_fp32.c
+++ b/Source/Generated/GB_AxB__max_rminus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_fp32
 // A'*B function (dot2):     GB_Adot2B__max_rminus_fp32
 // A'*B function (dot3):     GB_Adot3B__max_rminus_fp32
-// A*B function (heap):      GB_AheapB__max_rminus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (bkj - aik))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (y - x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (y - x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_RMINUS_FP32 || GxB_NO_MAX_RMINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_fp32
 GrB_Info GB_Adot2B__max_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_fp32
 GrB_Info GB_Adot3B__max_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_rminus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rminus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_fp64.c b/Source/Generated/GB_AxB__max_rminus_fp64.c
index c22af71fb0..ea47d50c5b 100644
--- a/Source/Generated/GB_AxB__max_rminus_fp64.c
+++ b/Source/Generated/GB_AxB__max_rminus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_fp64
 // A'*B function (dot2):     GB_Adot2B__max_rminus_fp64
 // A'*B function (dot3):     GB_Adot3B__max_rminus_fp64
-// A*B function (heap):      GB_AheapB__max_rminus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (bkj - aik))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (y - x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (y - x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_RMINUS_FP64 || GxB_NO_MAX_RMINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_fp64
 GrB_Info GB_Adot2B__max_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_fp64
 GrB_Info GB_Adot3B__max_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_rminus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rminus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_int16.c b/Source/Generated/GB_AxB__max_rminus_int16.c
index df3daa2e46..5aca1421df 100644
--- a/Source/Generated/GB_AxB__max_rminus_int16.c
+++ b/Source/Generated/GB_AxB__max_rminus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_int16
 // A'*B function (dot2):     GB_Adot2B__max_rminus_int16
 // A'*B function (dot3):     GB_Adot3B__max_rminus_int16
-// A*B function (heap):      GB_AheapB__max_rminus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_RMINUS_INT16 || GxB_NO_MAX_RMINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_int16
 GrB_Info GB_Adot2B__max_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_int16
 GrB_Info GB_Adot3B__max_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_int16
+GrB_Info GB_Asaxpy3B__max_rminus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_int32.c b/Source/Generated/GB_AxB__max_rminus_int32.c
index c1b62c0020..5e756f83df 100644
--- a/Source/Generated/GB_AxB__max_rminus_int32.c
+++ b/Source/Generated/GB_AxB__max_rminus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_int32
 // A'*B function (dot2):     GB_Adot2B__max_rminus_int32
 // A'*B function (dot3):     GB_Adot3B__max_rminus_int32
-// A*B function (heap):      GB_AheapB__max_rminus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_RMINUS_INT32 || GxB_NO_MAX_RMINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_int32
 GrB_Info GB_Adot2B__max_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_int32
 GrB_Info GB_Adot3B__max_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_int32
+GrB_Info GB_Asaxpy3B__max_rminus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_int64.c b/Source/Generated/GB_AxB__max_rminus_int64.c
index fd6467c405..e03822f4f3 100644
--- a/Source/Generated/GB_AxB__max_rminus_int64.c
+++ b/Source/Generated/GB_AxB__max_rminus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_int64
 // A'*B function (dot2):     GB_Adot2B__max_rminus_int64
 // A'*B function (dot3):     GB_Adot3B__max_rminus_int64
-// A*B function (heap):      GB_AheapB__max_rminus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_RMINUS_INT64 || GxB_NO_MAX_RMINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_int64
 GrB_Info GB_Adot2B__max_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_int64
 GrB_Info GB_Adot3B__max_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_int64
+GrB_Info GB_Asaxpy3B__max_rminus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_int8.c b/Source/Generated/GB_AxB__max_rminus_int8.c
index 056740c49c..70c59a4e9e 100644
--- a/Source/Generated/GB_AxB__max_rminus_int8.c
+++ b/Source/Generated/GB_AxB__max_rminus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_int8
 // A'*B function (dot2):     GB_Adot2B__max_rminus_int8
 // A'*B function (dot3):     GB_Adot3B__max_rminus_int8
-// A*B function (heap):      GB_AheapB__max_rminus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_RMINUS_INT8 || GxB_NO_MAX_RMINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_int8
 GrB_Info GB_Adot2B__max_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_int8
 GrB_Info GB_Adot3B__max_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_int8
+GrB_Info GB_Asaxpy3B__max_rminus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_uint16.c b/Source/Generated/GB_AxB__max_rminus_uint16.c
index aa924cd53e..e87d6bce91 100644
--- a/Source/Generated/GB_AxB__max_rminus_uint16.c
+++ b/Source/Generated/GB_AxB__max_rminus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_uint16
 // A'*B function (dot2):     GB_Adot2B__max_rminus_uint16
 // A'*B function (dot3):     GB_Adot3B__max_rminus_uint16
-// A*B function (heap):      GB_AheapB__max_rminus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_RMINUS_UINT16 || GxB_NO_MAX_RMINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_uint16
 GrB_Info GB_Adot2B__max_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_uint16
 GrB_Info GB_Adot3B__max_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_uint16
+GrB_Info GB_Asaxpy3B__max_rminus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_uint32.c b/Source/Generated/GB_AxB__max_rminus_uint32.c
index a811ed62be..a39cd574fd 100644
--- a/Source/Generated/GB_AxB__max_rminus_uint32.c
+++ b/Source/Generated/GB_AxB__max_rminus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_uint32
 // A'*B function (dot2):     GB_Adot2B__max_rminus_uint32
 // A'*B function (dot3):     GB_Adot3B__max_rminus_uint32
-// A*B function (heap):      GB_AheapB__max_rminus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_RMINUS_UINT32 || GxB_NO_MAX_RMINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_uint32
 GrB_Info GB_Adot2B__max_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_uint32
 GrB_Info GB_Adot3B__max_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_uint32
+GrB_Info GB_Asaxpy3B__max_rminus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_uint64.c b/Source/Generated/GB_AxB__max_rminus_uint64.c
index 5403c5e141..6b1c9f077e 100644
--- a/Source/Generated/GB_AxB__max_rminus_uint64.c
+++ b/Source/Generated/GB_AxB__max_rminus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_uint64
 // A'*B function (dot2):     GB_Adot2B__max_rminus_uint64
 // A'*B function (dot3):     GB_Adot3B__max_rminus_uint64
-// A*B function (heap):      GB_AheapB__max_rminus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_RMINUS_UINT64 || GxB_NO_MAX_RMINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_uint64
 GrB_Info GB_Adot2B__max_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_uint64
 GrB_Info GB_Adot3B__max_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_rminus_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_rminus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_rminus_uint8.c b/Source/Generated/GB_AxB__max_rminus_uint8.c
index a0c4664dec..04258a43f7 100644
--- a/Source/Generated/GB_AxB__max_rminus_uint8.c
+++ b/Source/Generated/GB_AxB__max_rminus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_rminus_uint8
 // A'*B function (dot2):     GB_Adot2B__max_rminus_uint8
 // A'*B function (dot3):     GB_Adot3B__max_rminus_uint8
-// A*B function (heap):      GB_AheapB__max_rminus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_rminus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_rminus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (bkj - aik) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (y - x) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_RMINUS_UINT8 || GxB_NO_MAX_RMINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_rminus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_rminus_uint8
 GrB_Info GB_Adot2B__max_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_rminus_uint8
 GrB_Info GB_Adot3B__max_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_rminus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_rminus_uint8
+GrB_Info GB_Asaxpy3B__max_rminus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_fp32.c b/Source/Generated/GB_AxB__max_second_fp32.c
index 071e1487d7..fef857d5b4 100644
--- a/Source/Generated/GB_AxB__max_second_fp32.c
+++ b/Source/Generated/GB_AxB__max_second_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_fp32
 // A'*B function (dot2):     GB_Adot2B__max_second_fp32
 // A'*B function (dot3):     GB_Adot3B__max_second_fp32
-// A*B function (heap):      GB_AheapB__max_second_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, bkj)
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_SECOND_FP32 || GxB_NO_MAX_SECOND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_fp32
 GrB_Info GB_Adot2B__max_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_fp32
 GrB_Info GB_Adot3B__max_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_fp64.c b/Source/Generated/GB_AxB__max_second_fp64.c
index cb10d0a592..467a8f80b9 100644
--- a/Source/Generated/GB_AxB__max_second_fp64.c
+++ b/Source/Generated/GB_AxB__max_second_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_fp64
 // A'*B function (dot2):     GB_Adot2B__max_second_fp64
 // A'*B function (dot3):     GB_Adot3B__max_second_fp64
-// A*B function (heap):      GB_AheapB__max_second_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, bkj)
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_SECOND_FP64 || GxB_NO_MAX_SECOND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_fp64
 GrB_Info GB_Adot2B__max_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_fp64
 GrB_Info GB_Adot3B__max_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_int16.c b/Source/Generated/GB_AxB__max_second_int16.c
index ee7d41a2c2..40cb095ddb 100644
--- a/Source/Generated/GB_AxB__max_second_int16.c
+++ b/Source/Generated/GB_AxB__max_second_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_int16
 // A'*B function (dot2):     GB_Adot2B__max_second_int16
 // A'*B function (dot3):     GB_Adot3B__max_second_int16
-// A*B function (heap):      GB_AheapB__max_second_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_SECOND_INT16 || GxB_NO_MAX_SECOND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_int16
 GrB_Info GB_Adot2B__max_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_int16
 GrB_Info GB_Adot3B__max_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_int32.c b/Source/Generated/GB_AxB__max_second_int32.c
index 50245f0bb2..6fb55340c0 100644
--- a/Source/Generated/GB_AxB__max_second_int32.c
+++ b/Source/Generated/GB_AxB__max_second_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_int32
 // A'*B function (dot2):     GB_Adot2B__max_second_int32
 // A'*B function (dot3):     GB_Adot3B__max_second_int32
-// A*B function (heap):      GB_AheapB__max_second_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_SECOND_INT32 || GxB_NO_MAX_SECOND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_int32
 GrB_Info GB_Adot2B__max_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_int32
 GrB_Info GB_Adot3B__max_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_int64.c b/Source/Generated/GB_AxB__max_second_int64.c
index f31f05c379..d155a9f9cf 100644
--- a/Source/Generated/GB_AxB__max_second_int64.c
+++ b/Source/Generated/GB_AxB__max_second_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_int64
 // A'*B function (dot2):     GB_Adot2B__max_second_int64
 // A'*B function (dot3):     GB_Adot3B__max_second_int64
-// A*B function (heap):      GB_AheapB__max_second_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_SECOND_INT64 || GxB_NO_MAX_SECOND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_int64
 GrB_Info GB_Adot2B__max_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_int64
 GrB_Info GB_Adot3B__max_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_int8.c b/Source/Generated/GB_AxB__max_second_int8.c
index 869c991175..c5cb100dcf 100644
--- a/Source/Generated/GB_AxB__max_second_int8.c
+++ b/Source/Generated/GB_AxB__max_second_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_int8
 // A'*B function (dot2):     GB_Adot2B__max_second_int8
 // A'*B function (dot3):     GB_Adot3B__max_second_int8
-// A*B function (heap):      GB_AheapB__max_second_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_SECOND_INT8 || GxB_NO_MAX_SECOND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_int8
 GrB_Info GB_Adot2B__max_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_int8
 GrB_Info GB_Adot3B__max_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_second_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_uint16.c b/Source/Generated/GB_AxB__max_second_uint16.c
index 652766083d..52898b0b07 100644
--- a/Source/Generated/GB_AxB__max_second_uint16.c
+++ b/Source/Generated/GB_AxB__max_second_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_uint16
 // A'*B function (dot2):     GB_Adot2B__max_second_uint16
 // A'*B function (dot3):     GB_Adot3B__max_second_uint16
-// A*B function (heap):      GB_AheapB__max_second_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_SECOND_UINT16 || GxB_NO_MAX_SECOND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_uint16
 GrB_Info GB_Adot2B__max_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_uint16
 GrB_Info GB_Adot3B__max_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_second_uint16
+GrB_Info GB_Asaxpy3B__max_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_uint32.c b/Source/Generated/GB_AxB__max_second_uint32.c
index 371efbd299..592357f93e 100644
--- a/Source/Generated/GB_AxB__max_second_uint32.c
+++ b/Source/Generated/GB_AxB__max_second_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_uint32
 // A'*B function (dot2):     GB_Adot2B__max_second_uint32
 // A'*B function (dot3):     GB_Adot3B__max_second_uint32
-// A*B function (heap):      GB_AheapB__max_second_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_SECOND_UINT32 || GxB_NO_MAX_SECOND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_uint32
 GrB_Info GB_Adot2B__max_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_uint32
 GrB_Info GB_Adot3B__max_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_second_uint32
+GrB_Info GB_Asaxpy3B__max_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_uint64.c b/Source/Generated/GB_AxB__max_second_uint64.c
index c1375336d2..c142cece53 100644
--- a/Source/Generated/GB_AxB__max_second_uint64.c
+++ b/Source/Generated/GB_AxB__max_second_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_uint64
 // A'*B function (dot2):     GB_Adot2B__max_second_uint64
 // A'*B function (dot3):     GB_Adot3B__max_second_uint64
-// A*B function (heap):      GB_AheapB__max_second_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_SECOND_UINT64 || GxB_NO_MAX_SECOND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_uint64
 GrB_Info GB_Adot2B__max_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_uint64
 GrB_Info GB_Adot3B__max_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_second_uint64
+GrB_Info GB_Asaxpy3B__max_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_second_uint8.c b/Source/Generated/GB_AxB__max_second_uint8.c
index 3e0b5c2bad..401006ff50 100644
--- a/Source/Generated/GB_AxB__max_second_uint8.c
+++ b/Source/Generated/GB_AxB__max_second_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_second_uint8
 // A'*B function (dot2):     GB_Adot2B__max_second_uint8
 // A'*B function (dot3):     GB_Adot3B__max_second_uint8
-// A*B function (heap):      GB_AheapB__max_second_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_second_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_second_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMAX (cij, bkj)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMAX (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMAX (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_SECOND_UINT8 || GxB_NO_MAX_SECOND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_second_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_second_uint8
 GrB_Info GB_Adot2B__max_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_second_uint8
 GrB_Info GB_Adot3B__max_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_second_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_second_uint8
+GrB_Info GB_Asaxpy3B__max_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_fp32.c b/Source/Generated/GB_AxB__max_times_fp32.c
index 0e42d1e2da..650a8f5b18 100644
--- a/Source/Generated/GB_AxB__max_times_fp32.c
+++ b/Source/Generated/GB_AxB__max_times_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_fp32
 // A'*B function (dot2):     GB_Adot2B__max_times_fp32
 // A'*B function (dot3):     GB_Adot3B__max_times_fp32
-// A*B function (heap):      GB_AheapB__max_times_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij = fmaxf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmaxf (cij, (aik * bkj))
 // Identity: (-INFINITY)
 // Terminal: if (cij == INFINITY) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmaxf (z, (x * y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmaxf (z, (x * y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     (-INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INFINITY) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmaxf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmaxf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmaxf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_MAX_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_MAX_TIMES_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_times_fp32
 GrB_Info GB_Adot2B__max_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_fp32
 GrB_Info GB_Adot3B__max_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_times_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_times_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_fp64.c b/Source/Generated/GB_AxB__max_times_fp64.c
index 8de36dca49..c8b6da0c23 100644
--- a/Source/Generated/GB_AxB__max_times_fp64.c
+++ b/Source/Generated/GB_AxB__max_times_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_fp64
 // A'*B function (dot2):     GB_Adot2B__max_times_fp64
 // A'*B function (dot3):     GB_Adot3B__max_times_fp64
-// A*B function (heap):      GB_AheapB__max_times_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij = fmax (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmax (cij, (aik * bkj))
 // Identity: ((double) -INFINITY)
 // Terminal: if (cij == ((double) INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmax (z, (x * y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmax (z, (x * y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) -INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmax (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmax (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmax (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_MAX_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_MAX_TIMES_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__max_times_fp64
 GrB_Info GB_Adot2B__max_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_fp64
 GrB_Info GB_Adot3B__max_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__max_times_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_times_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_int16.c b/Source/Generated/GB_AxB__max_times_int16.c
index 54ff7f38f4..7e5f1d36ee 100644
--- a/Source/Generated/GB_AxB__max_times_int16.c
+++ b/Source/Generated/GB_AxB__max_times_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_int16
 // A'*B function (dot2):     GB_Adot2B__max_times_int16
 // A'*B function (dot3):     GB_Adot3B__max_times_int16
-// A*B function (heap):      GB_AheapB__max_times_int16
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT16_MIN
 // Terminal: if (cij == INT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_MAX_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_MAX_TIMES_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_int16
 GrB_Info GB_Adot2B__max_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_int16
 GrB_Info GB_Adot3B__max_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_int16
+GrB_Info GB_Asaxpy3B__max_times_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_int32.c b/Source/Generated/GB_AxB__max_times_int32.c
index 8643fadd37..30d617d145 100644
--- a/Source/Generated/GB_AxB__max_times_int32.c
+++ b/Source/Generated/GB_AxB__max_times_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_int32
 // A'*B function (dot2):     GB_Adot2B__max_times_int32
 // A'*B function (dot3):     GB_Adot3B__max_times_int32
-// A*B function (heap):      GB_AheapB__max_times_int32
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT32_MIN
 // Terminal: if (cij == INT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_MAX_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_MAX_TIMES_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_int32
 GrB_Info GB_Adot2B__max_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_int32
 GrB_Info GB_Adot3B__max_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_int32
+GrB_Info GB_Asaxpy3B__max_times_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_int64.c b/Source/Generated/GB_AxB__max_times_int64.c
index 27db9c1a77..ccc95723d4 100644
--- a/Source/Generated/GB_AxB__max_times_int64.c
+++ b/Source/Generated/GB_AxB__max_times_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_int64
 // A'*B function (dot2):     GB_Adot2B__max_times_int64
 // A'*B function (dot3):     GB_Adot3B__max_times_int64
-// A*B function (heap):      GB_AheapB__max_times_int64
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT64_MIN
 // Terminal: if (cij == INT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_MAX_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_MAX_TIMES_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_int64
 GrB_Info GB_Adot2B__max_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_int64
 GrB_Info GB_Adot3B__max_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_int64
+GrB_Info GB_Asaxpy3B__max_times_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_int8.c b/Source/Generated/GB_AxB__max_times_int8.c
index 5d760e3794..48da7e0864 100644
--- a/Source/Generated/GB_AxB__max_times_int8.c
+++ b/Source/Generated/GB_AxB__max_times_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_int8
 // A'*B function (dot2):     GB_Adot2B__max_times_int8
 // A'*B function (dot3):     GB_Adot3B__max_times_int8
-// A*B function (heap):      GB_AheapB__max_times_int8
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: INT8_MIN
 // Terminal: if (cij == INT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MIN
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_MAX_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_MAX_TIMES_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_int8
 GrB_Info GB_Adot2B__max_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_int8
 GrB_Info GB_Adot3B__max_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_int8
+GrB_Info GB_Asaxpy3B__max_times_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_uint16.c b/Source/Generated/GB_AxB__max_times_uint16.c
index 77766b9b94..4539498e67 100644
--- a/Source/Generated/GB_AxB__max_times_uint16.c
+++ b/Source/Generated/GB_AxB__max_times_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_uint16
 // A'*B function (dot2):     GB_Adot2B__max_times_uint16
 // A'*B function (dot3):     GB_Adot3B__max_times_uint16
-// A*B function (heap):      GB_AheapB__max_times_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT16_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT16_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_MAX_TIMES_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_uint16
 GrB_Info GB_Adot2B__max_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_uint16
 GrB_Info GB_Adot3B__max_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_uint16
+GrB_Info GB_Asaxpy3B__max_times_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_uint32.c b/Source/Generated/GB_AxB__max_times_uint32.c
index 90c694f5a8..7d79c8c3c9 100644
--- a/Source/Generated/GB_AxB__max_times_uint32.c
+++ b/Source/Generated/GB_AxB__max_times_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_uint32
 // A'*B function (dot2):     GB_Adot2B__max_times_uint32
 // A'*B function (dot3):     GB_Adot3B__max_times_uint32
-// A*B function (heap):      GB_AheapB__max_times_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT32_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT32_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_MAX_TIMES_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_uint32
 GrB_Info GB_Adot2B__max_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_uint32
 GrB_Info GB_Adot3B__max_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_uint32
+GrB_Info GB_Asaxpy3B__max_times_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_uint64.c b/Source/Generated/GB_AxB__max_times_uint64.c
index e906a2d302..c87dca0dad 100644
--- a/Source/Generated/GB_AxB__max_times_uint64.c
+++ b/Source/Generated/GB_AxB__max_times_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_uint64
 // A'*B function (dot2):     GB_Adot2B__max_times_uint64
 // A'*B function (dot3):     GB_Adot3B__max_times_uint64
-// A*B function (heap):      GB_AheapB__max_times_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT64_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT64_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_MAX_TIMES_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_uint64
 GrB_Info GB_Adot2B__max_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_uint64
 GrB_Info GB_Adot3B__max_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__max_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
 
-GrB_Info GB_AheapB__max_times_uint64
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__max_times_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__max_times_uint8.c b/Source/Generated/GB_AxB__max_times_uint8.c
index d0147a8ccc..233f0e5fae 100644
--- a/Source/Generated/GB_AxB__max_times_uint8.c
+++ b/Source/Generated/GB_AxB__max_times_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__max_times_uint8
 // A'*B function (dot2):     GB_Adot2B__max_times_uint8
 // A'*B function (dot3):     GB_Adot3B__max_times_uint8
-// A*B function (heap):      GB_AheapB__max_times_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__max_times_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__max_times_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMAX (cij, x_op_y)
+// Add:      cij = GB_IMAX (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik * bkj) ; cij = GB_IMAX (cij, x_op_y)
 // Identity: 0
 // Terminal: if (cij == UINT8_MAX) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x * y) ; z = GB_IMAX (z, x_op_y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == UINT8_MAX) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMAX (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMAX (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMAX (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_MAX_TIMES_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__max_times_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__max_times_uint8
 GrB_Info GB_Adot2B__max_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__max_times_uint8
 GrB_Info GB_Adot3B__max_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__max_times_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__max_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__max_times_uint8
+GrB_Info GB_Asaxpy3B__max_times_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_fp32.c b/Source/Generated/GB_AxB__min_div_fp32.c
index fe29ff5f77..e5a98bff94 100644
--- a/Source/Generated/GB_AxB__min_div_fp32.c
+++ b/Source/Generated/GB_AxB__min_div_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_fp32
 // A'*B function (dot2):     GB_Adot2B__min_div_fp32
 // A'*B function (dot3):     GB_Adot3B__min_div_fp32
-// A*B function (heap):      GB_AheapB__min_div_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik / bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x / y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x / y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_DIV_FP32 || GxB_NO_MIN_DIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_div_fp32
 GrB_Info GB_Adot2B__min_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_fp32
 GrB_Info GB_Adot3B__min_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_div_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_div_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_fp64.c b/Source/Generated/GB_AxB__min_div_fp64.c
index 57d2a4d325..728ff7e513 100644
--- a/Source/Generated/GB_AxB__min_div_fp64.c
+++ b/Source/Generated/GB_AxB__min_div_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_fp64
 // A'*B function (dot2):     GB_Adot2B__min_div_fp64
 // A'*B function (dot3):     GB_Adot3B__min_div_fp64
-// A*B function (heap):      GB_AheapB__min_div_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik / bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x / y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x / y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_DIV_FP64 || GxB_NO_MIN_DIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_div_fp64
 GrB_Info GB_Adot2B__min_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_fp64
 GrB_Info GB_Adot3B__min_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_div_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_div_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_int16.c b/Source/Generated/GB_AxB__min_div_int16.c
index 57515a7cd5..10366bdbc8 100644
--- a/Source/Generated/GB_AxB__min_div_int16.c
+++ b/Source/Generated/GB_AxB__min_div_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_int16
 // A'*B function (dot2):     GB_Adot2B__min_div_int16
 // A'*B function (dot3):     GB_Adot3B__min_div_int16
-// A*B function (heap):      GB_AheapB__min_div_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 16)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 16) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_DIV_INT16 || GxB_NO_MIN_DIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_int16
 GrB_Info GB_Adot2B__min_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_int16
 GrB_Info GB_Adot3B__min_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_int16
+GrB_Info GB_Asaxpy3B__min_div_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_int32.c b/Source/Generated/GB_AxB__min_div_int32.c
index 10341dce84..9f97a780be 100644
--- a/Source/Generated/GB_AxB__min_div_int32.c
+++ b/Source/Generated/GB_AxB__min_div_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_int32
 // A'*B function (dot2):     GB_Adot2B__min_div_int32
 // A'*B function (dot3):     GB_Adot3B__min_div_int32
-// A*B function (heap):      GB_AheapB__min_div_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 32)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 32) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_DIV_INT32 || GxB_NO_MIN_DIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_int32
 GrB_Info GB_Adot2B__min_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_int32
 GrB_Info GB_Adot3B__min_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_int32
+GrB_Info GB_Asaxpy3B__min_div_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_int64.c b/Source/Generated/GB_AxB__min_div_int64.c
index a8580a63d5..3bdf060700 100644
--- a/Source/Generated/GB_AxB__min_div_int64.c
+++ b/Source/Generated/GB_AxB__min_div_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_int64
 // A'*B function (dot2):     GB_Adot2B__min_div_int64
 // A'*B function (dot3):     GB_Adot3B__min_div_int64
-// A*B function (heap):      GB_AheapB__min_div_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 64)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 64) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_DIV_INT64 || GxB_NO_MIN_DIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_int64
 GrB_Info GB_Adot2B__min_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_int64
 GrB_Info GB_Adot3B__min_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_int64
+GrB_Info GB_Asaxpy3B__min_div_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_int8.c b/Source/Generated/GB_AxB__min_div_int8.c
index aaebbeaa26..6ac717397f 100644
--- a/Source/Generated/GB_AxB__min_div_int8.c
+++ b/Source/Generated/GB_AxB__min_div_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_int8
 // A'*B function (dot2):     GB_Adot2B__min_div_int8
 // A'*B function (dot3):     GB_Adot3B__min_div_int8
-// A*B function (heap):      GB_AheapB__min_div_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 8)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 8) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_DIV_INT8 || GxB_NO_MIN_DIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_int8
 GrB_Info GB_Adot2B__min_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_int8
 GrB_Info GB_Adot3B__min_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_int8
+GrB_Info GB_Asaxpy3B__min_div_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_uint16.c b/Source/Generated/GB_AxB__min_div_uint16.c
index 211b9c64a3..7c3427f2b8 100644
--- a/Source/Generated/GB_AxB__min_div_uint16.c
+++ b/Source/Generated/GB_AxB__min_div_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_uint16
 // A'*B function (dot2):     GB_Adot2B__min_div_uint16
 // A'*B function (dot3):     GB_Adot3B__min_div_uint16
-// A*B function (heap):      GB_AheapB__min_div_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 16)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 16) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_DIV_UINT16 || GxB_NO_MIN_DIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_uint16
 GrB_Info GB_Adot2B__min_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_uint16
 GrB_Info GB_Adot3B__min_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_uint16
+GrB_Info GB_Asaxpy3B__min_div_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_uint32.c b/Source/Generated/GB_AxB__min_div_uint32.c
index 43b25892ec..a92a84ea49 100644
--- a/Source/Generated/GB_AxB__min_div_uint32.c
+++ b/Source/Generated/GB_AxB__min_div_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_uint32
 // A'*B function (dot2):     GB_Adot2B__min_div_uint32
 // A'*B function (dot3):     GB_Adot3B__min_div_uint32
-// A*B function (heap):      GB_AheapB__min_div_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 32)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 32) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_DIV_UINT32 || GxB_NO_MIN_DIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_uint32
 GrB_Info GB_Adot2B__min_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_uint32
 GrB_Info GB_Adot3B__min_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_uint32
+GrB_Info GB_Asaxpy3B__min_div_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_uint64.c b/Source/Generated/GB_AxB__min_div_uint64.c
index 23132be91e..51da4b4628 100644
--- a/Source/Generated/GB_AxB__min_div_uint64.c
+++ b/Source/Generated/GB_AxB__min_div_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_uint64
 // A'*B function (dot2):     GB_Adot2B__min_div_uint64
 // A'*B function (dot3):     GB_Adot3B__min_div_uint64
-// A*B function (heap):      GB_AheapB__min_div_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 64)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 64) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_DIV_UINT64 || GxB_NO_MIN_DIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_uint64
 GrB_Info GB_Adot2B__min_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_uint64
 GrB_Info GB_Adot3B__min_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_uint64
+GrB_Info GB_Asaxpy3B__min_div_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_div_uint8.c b/Source/Generated/GB_AxB__min_div_uint8.c
index 2f088e767e..08c6b989e6 100644
--- a/Source/Generated/GB_AxB__min_div_uint8.c
+++ b/Source/Generated/GB_AxB__min_div_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_div_uint8
 // A'*B function (dot2):     GB_Adot2B__min_div_uint8
 // A'*B function (dot3):     GB_Adot3B__min_div_uint8
-// A*B function (heap):      GB_AheapB__min_div_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_div_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_div_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 8)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 8) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_DIV_UINT8 || GxB_NO_MIN_DIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_div_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_div_uint8
 GrB_Info GB_Adot2B__min_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_div_uint8
 GrB_Info GB_Adot3B__min_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_div_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_div_uint8
+GrB_Info GB_Asaxpy3B__min_div_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_fp32.c b/Source/Generated/GB_AxB__min_first_fp32.c
index e34d41ec72..a6a218aa7c 100644
--- a/Source/Generated/GB_AxB__min_first_fp32.c
+++ b/Source/Generated/GB_AxB__min_first_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_fp32
 // A'*B function (dot2):     GB_Adot2B__min_first_fp32
 // A'*B function (dot3):     GB_Adot3B__min_first_fp32
-// A*B function (heap):      GB_AheapB__min_first_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, aik)
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_FIRST_FP32 || GxB_NO_MIN_FIRST_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_fp32
 GrB_Info GB_Adot2B__min_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_fp32
 GrB_Info GB_Adot3B__min_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_fp64.c b/Source/Generated/GB_AxB__min_first_fp64.c
index 7fc07dd712..536e230559 100644
--- a/Source/Generated/GB_AxB__min_first_fp64.c
+++ b/Source/Generated/GB_AxB__min_first_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_fp64
 // A'*B function (dot2):     GB_Adot2B__min_first_fp64
 // A'*B function (dot3):     GB_Adot3B__min_first_fp64
-// A*B function (heap):      GB_AheapB__min_first_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, aik)
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_FIRST_FP64 || GxB_NO_MIN_FIRST_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_fp64
 GrB_Info GB_Adot2B__min_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_fp64
 GrB_Info GB_Adot3B__min_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_int16.c b/Source/Generated/GB_AxB__min_first_int16.c
index e519b867fb..29cd33e065 100644
--- a/Source/Generated/GB_AxB__min_first_int16.c
+++ b/Source/Generated/GB_AxB__min_first_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_int16
 // A'*B function (dot2):     GB_Adot2B__min_first_int16
 // A'*B function (dot3):     GB_Adot3B__min_first_int16
-// A*B function (heap):      GB_AheapB__min_first_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_FIRST_INT16 || GxB_NO_MIN_FIRST_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_int16
 GrB_Info GB_Adot2B__min_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_int16
 GrB_Info GB_Adot3B__min_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_int32.c b/Source/Generated/GB_AxB__min_first_int32.c
index c76c7709c8..de90272664 100644
--- a/Source/Generated/GB_AxB__min_first_int32.c
+++ b/Source/Generated/GB_AxB__min_first_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_int32
 // A'*B function (dot2):     GB_Adot2B__min_first_int32
 // A'*B function (dot3):     GB_Adot3B__min_first_int32
-// A*B function (heap):      GB_AheapB__min_first_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_FIRST_INT32 || GxB_NO_MIN_FIRST_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_int32
 GrB_Info GB_Adot2B__min_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_int32
 GrB_Info GB_Adot3B__min_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_int64.c b/Source/Generated/GB_AxB__min_first_int64.c
index d03890a207..921b05fd97 100644
--- a/Source/Generated/GB_AxB__min_first_int64.c
+++ b/Source/Generated/GB_AxB__min_first_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_int64
 // A'*B function (dot2):     GB_Adot2B__min_first_int64
 // A'*B function (dot3):     GB_Adot3B__min_first_int64
-// A*B function (heap):      GB_AheapB__min_first_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_FIRST_INT64 || GxB_NO_MIN_FIRST_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_int64
 GrB_Info GB_Adot2B__min_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_int64
 GrB_Info GB_Adot3B__min_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_int8.c b/Source/Generated/GB_AxB__min_first_int8.c
index 93fcb92479..83f41fe4f8 100644
--- a/Source/Generated/GB_AxB__min_first_int8.c
+++ b/Source/Generated/GB_AxB__min_first_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_int8
 // A'*B function (dot2):     GB_Adot2B__min_first_int8
 // A'*B function (dot3):     GB_Adot3B__min_first_int8
-// A*B function (heap):      GB_AheapB__min_first_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_FIRST_INT8 || GxB_NO_MIN_FIRST_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_int8
 GrB_Info GB_Adot2B__min_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_int8
 GrB_Info GB_Adot3B__min_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_uint16.c b/Source/Generated/GB_AxB__min_first_uint16.c
index 25a77da8cc..d34d4e8cf8 100644
--- a/Source/Generated/GB_AxB__min_first_uint16.c
+++ b/Source/Generated/GB_AxB__min_first_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_uint16
 // A'*B function (dot2):     GB_Adot2B__min_first_uint16
 // A'*B function (dot3):     GB_Adot3B__min_first_uint16
-// A*B function (heap):      GB_AheapB__min_first_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_FIRST_UINT16 || GxB_NO_MIN_FIRST_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_uint16
 GrB_Info GB_Adot2B__min_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_uint16
 GrB_Info GB_Adot3B__min_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_uint32.c b/Source/Generated/GB_AxB__min_first_uint32.c
index 9edc9e8b28..faba8492b1 100644
--- a/Source/Generated/GB_AxB__min_first_uint32.c
+++ b/Source/Generated/GB_AxB__min_first_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_uint32
 // A'*B function (dot2):     GB_Adot2B__min_first_uint32
 // A'*B function (dot3):     GB_Adot3B__min_first_uint32
-// A*B function (heap):      GB_AheapB__min_first_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_FIRST_UINT32 || GxB_NO_MIN_FIRST_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_uint32
 GrB_Info GB_Adot2B__min_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_uint32
 GrB_Info GB_Adot3B__min_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_uint64.c b/Source/Generated/GB_AxB__min_first_uint64.c
index a4c4cb3081..632b0e08d9 100644
--- a/Source/Generated/GB_AxB__min_first_uint64.c
+++ b/Source/Generated/GB_AxB__min_first_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_uint64
 // A'*B function (dot2):     GB_Adot2B__min_first_uint64
 // A'*B function (dot3):     GB_Adot3B__min_first_uint64
-// A*B function (heap):      GB_AheapB__min_first_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_FIRST_UINT64 || GxB_NO_MIN_FIRST_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_uint64
 GrB_Info GB_Adot2B__min_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_uint64
 GrB_Info GB_Adot3B__min_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_first_uint8.c b/Source/Generated/GB_AxB__min_first_uint8.c
index 2d926faa76..ee86300468 100644
--- a/Source/Generated/GB_AxB__min_first_uint8.c
+++ b/Source/Generated/GB_AxB__min_first_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_first_uint8
 // A'*B function (dot2):     GB_Adot2B__min_first_uint8
 // A'*B function (dot3):     GB_Adot3B__min_first_uint8
-// A*B function (heap):      GB_AheapB__min_first_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_first_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_first_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, aik)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_FIRST_UINT8 || GxB_NO_MIN_FIRST_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_first_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_first_uint8
 GrB_Info GB_Adot2B__min_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_first_uint8
 GrB_Info GB_Adot3B__min_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_first_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_first_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_fp32.c b/Source/Generated/GB_AxB__min_iseq_fp32.c
index f5b9c9de4e..838ccff665 100644
--- a/Source/Generated/GB_AxB__min_iseq_fp32.c
+++ b/Source/Generated/GB_AxB__min_iseq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_fp32
 // A'*B function (dot2):     GB_Adot2B__min_iseq_fp32
 // A'*B function (dot3):     GB_Adot3B__min_iseq_fp32
-// A*B function (heap):      GB_AheapB__min_iseq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik == bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISEQ_FP32 || GxB_NO_MIN_ISEQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_fp32
 GrB_Info GB_Adot2B__min_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_fp32
 GrB_Info GB_Adot3B__min_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_fp64.c b/Source/Generated/GB_AxB__min_iseq_fp64.c
index 851f72b688..86e0feebfc 100644
--- a/Source/Generated/GB_AxB__min_iseq_fp64.c
+++ b/Source/Generated/GB_AxB__min_iseq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_fp64
 // A'*B function (dot2):     GB_Adot2B__min_iseq_fp64
 // A'*B function (dot3):     GB_Adot3B__min_iseq_fp64
-// A*B function (heap):      GB_AheapB__min_iseq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik == bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISEQ_FP64 || GxB_NO_MIN_ISEQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_fp64
 GrB_Info GB_Adot2B__min_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_fp64
 GrB_Info GB_Adot3B__min_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_int16.c b/Source/Generated/GB_AxB__min_iseq_int16.c
index 1811cdefcc..e49ab7a366 100644
--- a/Source/Generated/GB_AxB__min_iseq_int16.c
+++ b/Source/Generated/GB_AxB__min_iseq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_int16
 // A'*B function (dot2):     GB_Adot2B__min_iseq_int16
 // A'*B function (dot3):     GB_Adot3B__min_iseq_int16
-// A*B function (heap):      GB_AheapB__min_iseq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISEQ_INT16 || GxB_NO_MIN_ISEQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_int16
 GrB_Info GB_Adot2B__min_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_int16
 GrB_Info GB_Adot3B__min_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_int32.c b/Source/Generated/GB_AxB__min_iseq_int32.c
index d48b9016b6..2dcc2e7050 100644
--- a/Source/Generated/GB_AxB__min_iseq_int32.c
+++ b/Source/Generated/GB_AxB__min_iseq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_int32
 // A'*B function (dot2):     GB_Adot2B__min_iseq_int32
 // A'*B function (dot3):     GB_Adot3B__min_iseq_int32
-// A*B function (heap):      GB_AheapB__min_iseq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISEQ_INT32 || GxB_NO_MIN_ISEQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_int32
 GrB_Info GB_Adot2B__min_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_int32
 GrB_Info GB_Adot3B__min_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_int64.c b/Source/Generated/GB_AxB__min_iseq_int64.c
index b67d15211b..c8e508cf21 100644
--- a/Source/Generated/GB_AxB__min_iseq_int64.c
+++ b/Source/Generated/GB_AxB__min_iseq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_int64
 // A'*B function (dot2):     GB_Adot2B__min_iseq_int64
 // A'*B function (dot3):     GB_Adot3B__min_iseq_int64
-// A*B function (heap):      GB_AheapB__min_iseq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISEQ_INT64 || GxB_NO_MIN_ISEQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_int64
 GrB_Info GB_Adot2B__min_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_int64
 GrB_Info GB_Adot3B__min_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_int8.c b/Source/Generated/GB_AxB__min_iseq_int8.c
index e9e77b8f86..aa16ab2458 100644
--- a/Source/Generated/GB_AxB__min_iseq_int8.c
+++ b/Source/Generated/GB_AxB__min_iseq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_int8
 // A'*B function (dot2):     GB_Adot2B__min_iseq_int8
 // A'*B function (dot3):     GB_Adot3B__min_iseq_int8
-// A*B function (heap):      GB_AheapB__min_iseq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISEQ_INT8 || GxB_NO_MIN_ISEQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_int8
 GrB_Info GB_Adot2B__min_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_int8
 GrB_Info GB_Adot3B__min_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_uint16.c b/Source/Generated/GB_AxB__min_iseq_uint16.c
index bf5c167793..d64118adcb 100644
--- a/Source/Generated/GB_AxB__min_iseq_uint16.c
+++ b/Source/Generated/GB_AxB__min_iseq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_uint16
 // A'*B function (dot2):     GB_Adot2B__min_iseq_uint16
 // A'*B function (dot3):     GB_Adot3B__min_iseq_uint16
-// A*B function (heap):      GB_AheapB__min_iseq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISEQ_UINT16 || GxB_NO_MIN_ISEQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_uint16
 GrB_Info GB_Adot2B__min_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_uint16
 GrB_Info GB_Adot3B__min_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_uint32.c b/Source/Generated/GB_AxB__min_iseq_uint32.c
index 8c5b99af3f..5000a483d1 100644
--- a/Source/Generated/GB_AxB__min_iseq_uint32.c
+++ b/Source/Generated/GB_AxB__min_iseq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_uint32
 // A'*B function (dot2):     GB_Adot2B__min_iseq_uint32
 // A'*B function (dot3):     GB_Adot3B__min_iseq_uint32
-// A*B function (heap):      GB_AheapB__min_iseq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISEQ_UINT32 || GxB_NO_MIN_ISEQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_uint32
 GrB_Info GB_Adot2B__min_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_uint32
 GrB_Info GB_Adot3B__min_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_uint64.c b/Source/Generated/GB_AxB__min_iseq_uint64.c
index a32c85ae7f..d6955c1324 100644
--- a/Source/Generated/GB_AxB__min_iseq_uint64.c
+++ b/Source/Generated/GB_AxB__min_iseq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_uint64
 // A'*B function (dot2):     GB_Adot2B__min_iseq_uint64
 // A'*B function (dot3):     GB_Adot3B__min_iseq_uint64
-// A*B function (heap):      GB_AheapB__min_iseq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISEQ_UINT64 || GxB_NO_MIN_ISEQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_uint64
 GrB_Info GB_Adot2B__min_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_uint64
 GrB_Info GB_Adot3B__min_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_iseq_uint8.c b/Source/Generated/GB_AxB__min_iseq_uint8.c
index 7696245864..c841cce727 100644
--- a/Source/Generated/GB_AxB__min_iseq_uint8.c
+++ b/Source/Generated/GB_AxB__min_iseq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_iseq_uint8
 // A'*B function (dot2):     GB_Adot2B__min_iseq_uint8
 // A'*B function (dot3):     GB_Adot3B__min_iseq_uint8
-// A*B function (heap):      GB_AheapB__min_iseq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_iseq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_iseq_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik == bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x == y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x == y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISEQ_UINT8 || GxB_NO_MIN_ISEQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_iseq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_iseq_uint8
 GrB_Info GB_Adot2B__min_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_iseq_uint8
 GrB_Info GB_Adot3B__min_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_iseq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_iseq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_iseq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_fp32.c b/Source/Generated/GB_AxB__min_isge_fp32.c
index b3f7491253..044abf425f 100644
--- a/Source/Generated/GB_AxB__min_isge_fp32.c
+++ b/Source/Generated/GB_AxB__min_isge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_fp32
 // A'*B function (dot2):     GB_Adot2B__min_isge_fp32
 // A'*B function (dot3):     GB_Adot3B__min_isge_fp32
-// A*B function (heap):      GB_AheapB__min_isge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik >= bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISGE_FP32 || GxB_NO_MIN_ISGE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_fp32
 GrB_Info GB_Adot2B__min_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_fp32
 GrB_Info GB_Adot3B__min_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_fp64.c b/Source/Generated/GB_AxB__min_isge_fp64.c
index d1d50c4eec..0c373355fb 100644
--- a/Source/Generated/GB_AxB__min_isge_fp64.c
+++ b/Source/Generated/GB_AxB__min_isge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_fp64
 // A'*B function (dot2):     GB_Adot2B__min_isge_fp64
 // A'*B function (dot3):     GB_Adot3B__min_isge_fp64
-// A*B function (heap):      GB_AheapB__min_isge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik >= bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISGE_FP64 || GxB_NO_MIN_ISGE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_fp64
 GrB_Info GB_Adot2B__min_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_fp64
 GrB_Info GB_Adot3B__min_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_int16.c b/Source/Generated/GB_AxB__min_isge_int16.c
index 31b2b553a9..0f20abf136 100644
--- a/Source/Generated/GB_AxB__min_isge_int16.c
+++ b/Source/Generated/GB_AxB__min_isge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_int16
 // A'*B function (dot2):     GB_Adot2B__min_isge_int16
 // A'*B function (dot3):     GB_Adot3B__min_isge_int16
-// A*B function (heap):      GB_AheapB__min_isge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISGE_INT16 || GxB_NO_MIN_ISGE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_int16
 GrB_Info GB_Adot2B__min_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_int16
 GrB_Info GB_Adot3B__min_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_int32.c b/Source/Generated/GB_AxB__min_isge_int32.c
index 44167c976d..6120caeb65 100644
--- a/Source/Generated/GB_AxB__min_isge_int32.c
+++ b/Source/Generated/GB_AxB__min_isge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_int32
 // A'*B function (dot2):     GB_Adot2B__min_isge_int32
 // A'*B function (dot3):     GB_Adot3B__min_isge_int32
-// A*B function (heap):      GB_AheapB__min_isge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISGE_INT32 || GxB_NO_MIN_ISGE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_int32
 GrB_Info GB_Adot2B__min_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_int32
 GrB_Info GB_Adot3B__min_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_int64.c b/Source/Generated/GB_AxB__min_isge_int64.c
index fea69b9451..4e446de5f6 100644
--- a/Source/Generated/GB_AxB__min_isge_int64.c
+++ b/Source/Generated/GB_AxB__min_isge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_int64
 // A'*B function (dot2):     GB_Adot2B__min_isge_int64
 // A'*B function (dot3):     GB_Adot3B__min_isge_int64
-// A*B function (heap):      GB_AheapB__min_isge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISGE_INT64 || GxB_NO_MIN_ISGE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_int64
 GrB_Info GB_Adot2B__min_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_int64
 GrB_Info GB_Adot3B__min_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_int8.c b/Source/Generated/GB_AxB__min_isge_int8.c
index 96a34060a5..d06797a569 100644
--- a/Source/Generated/GB_AxB__min_isge_int8.c
+++ b/Source/Generated/GB_AxB__min_isge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_int8
 // A'*B function (dot2):     GB_Adot2B__min_isge_int8
 // A'*B function (dot3):     GB_Adot3B__min_isge_int8
-// A*B function (heap):      GB_AheapB__min_isge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISGE_INT8 || GxB_NO_MIN_ISGE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_int8
 GrB_Info GB_Adot2B__min_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_int8
 GrB_Info GB_Adot3B__min_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_uint16.c b/Source/Generated/GB_AxB__min_isge_uint16.c
index 6d31446a5c..62f65d3c19 100644
--- a/Source/Generated/GB_AxB__min_isge_uint16.c
+++ b/Source/Generated/GB_AxB__min_isge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_uint16
 // A'*B function (dot2):     GB_Adot2B__min_isge_uint16
 // A'*B function (dot3):     GB_Adot3B__min_isge_uint16
-// A*B function (heap):      GB_AheapB__min_isge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISGE_UINT16 || GxB_NO_MIN_ISGE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_uint16
 GrB_Info GB_Adot2B__min_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_uint16
 GrB_Info GB_Adot3B__min_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_uint32.c b/Source/Generated/GB_AxB__min_isge_uint32.c
index 4b36f33639..1238c89e4c 100644
--- a/Source/Generated/GB_AxB__min_isge_uint32.c
+++ b/Source/Generated/GB_AxB__min_isge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_uint32
 // A'*B function (dot2):     GB_Adot2B__min_isge_uint32
 // A'*B function (dot3):     GB_Adot3B__min_isge_uint32
-// A*B function (heap):      GB_AheapB__min_isge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISGE_UINT32 || GxB_NO_MIN_ISGE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_uint32
 GrB_Info GB_Adot2B__min_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_uint32
 GrB_Info GB_Adot3B__min_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_uint64.c b/Source/Generated/GB_AxB__min_isge_uint64.c
index 0ba03539b5..e4678b953f 100644
--- a/Source/Generated/GB_AxB__min_isge_uint64.c
+++ b/Source/Generated/GB_AxB__min_isge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_uint64
 // A'*B function (dot2):     GB_Adot2B__min_isge_uint64
 // A'*B function (dot3):     GB_Adot3B__min_isge_uint64
-// A*B function (heap):      GB_AheapB__min_isge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISGE_UINT64 || GxB_NO_MIN_ISGE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_uint64
 GrB_Info GB_Adot2B__min_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_uint64
 GrB_Info GB_Adot3B__min_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isge_uint8.c b/Source/Generated/GB_AxB__min_isge_uint8.c
index 1b3429f75a..526dbde51f 100644
--- a/Source/Generated/GB_AxB__min_isge_uint8.c
+++ b/Source/Generated/GB_AxB__min_isge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isge_uint8
 // A'*B function (dot2):     GB_Adot2B__min_isge_uint8
 // A'*B function (dot3):     GB_Adot3B__min_isge_uint8
-// A*B function (heap):      GB_AheapB__min_isge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isge_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik >= bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x >= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x >= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISGE_UINT8 || GxB_NO_MIN_ISGE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isge_uint8
 GrB_Info GB_Adot2B__min_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isge_uint8
 GrB_Info GB_Adot3B__min_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_fp32.c b/Source/Generated/GB_AxB__min_isgt_fp32.c
index 87725af3ba..7df03b1624 100644
--- a/Source/Generated/GB_AxB__min_isgt_fp32.c
+++ b/Source/Generated/GB_AxB__min_isgt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_fp32
 // A'*B function (dot2):     GB_Adot2B__min_isgt_fp32
 // A'*B function (dot3):     GB_Adot3B__min_isgt_fp32
-// A*B function (heap):      GB_AheapB__min_isgt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik > bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISGT_FP32 || GxB_NO_MIN_ISGT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_fp32
 GrB_Info GB_Adot2B__min_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_fp32
 GrB_Info GB_Adot3B__min_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_fp64.c b/Source/Generated/GB_AxB__min_isgt_fp64.c
index 74b08fbed5..2da908ada4 100644
--- a/Source/Generated/GB_AxB__min_isgt_fp64.c
+++ b/Source/Generated/GB_AxB__min_isgt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_fp64
 // A'*B function (dot2):     GB_Adot2B__min_isgt_fp64
 // A'*B function (dot3):     GB_Adot3B__min_isgt_fp64
-// A*B function (heap):      GB_AheapB__min_isgt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik > bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISGT_FP64 || GxB_NO_MIN_ISGT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_fp64
 GrB_Info GB_Adot2B__min_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_fp64
 GrB_Info GB_Adot3B__min_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_int16.c b/Source/Generated/GB_AxB__min_isgt_int16.c
index dbafa503aa..ff5a604747 100644
--- a/Source/Generated/GB_AxB__min_isgt_int16.c
+++ b/Source/Generated/GB_AxB__min_isgt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_int16
 // A'*B function (dot2):     GB_Adot2B__min_isgt_int16
 // A'*B function (dot3):     GB_Adot3B__min_isgt_int16
-// A*B function (heap):      GB_AheapB__min_isgt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISGT_INT16 || GxB_NO_MIN_ISGT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_int16
 GrB_Info GB_Adot2B__min_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_int16
 GrB_Info GB_Adot3B__min_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_int32.c b/Source/Generated/GB_AxB__min_isgt_int32.c
index c5e745d6fa..735ba1c0d1 100644
--- a/Source/Generated/GB_AxB__min_isgt_int32.c
+++ b/Source/Generated/GB_AxB__min_isgt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_int32
 // A'*B function (dot2):     GB_Adot2B__min_isgt_int32
 // A'*B function (dot3):     GB_Adot3B__min_isgt_int32
-// A*B function (heap):      GB_AheapB__min_isgt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISGT_INT32 || GxB_NO_MIN_ISGT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_int32
 GrB_Info GB_Adot2B__min_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_int32
 GrB_Info GB_Adot3B__min_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_int64.c b/Source/Generated/GB_AxB__min_isgt_int64.c
index c610065769..4826371ba2 100644
--- a/Source/Generated/GB_AxB__min_isgt_int64.c
+++ b/Source/Generated/GB_AxB__min_isgt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_int64
 // A'*B function (dot2):     GB_Adot2B__min_isgt_int64
 // A'*B function (dot3):     GB_Adot3B__min_isgt_int64
-// A*B function (heap):      GB_AheapB__min_isgt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISGT_INT64 || GxB_NO_MIN_ISGT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_int64
 GrB_Info GB_Adot2B__min_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_int64
 GrB_Info GB_Adot3B__min_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_int8.c b/Source/Generated/GB_AxB__min_isgt_int8.c
index 0dc047a716..d1b23dde5c 100644
--- a/Source/Generated/GB_AxB__min_isgt_int8.c
+++ b/Source/Generated/GB_AxB__min_isgt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_int8
 // A'*B function (dot2):     GB_Adot2B__min_isgt_int8
 // A'*B function (dot3):     GB_Adot3B__min_isgt_int8
-// A*B function (heap):      GB_AheapB__min_isgt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISGT_INT8 || GxB_NO_MIN_ISGT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_int8
 GrB_Info GB_Adot2B__min_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_int8
 GrB_Info GB_Adot3B__min_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_uint16.c b/Source/Generated/GB_AxB__min_isgt_uint16.c
index 8953907f26..1230fd526a 100644
--- a/Source/Generated/GB_AxB__min_isgt_uint16.c
+++ b/Source/Generated/GB_AxB__min_isgt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_uint16
 // A'*B function (dot2):     GB_Adot2B__min_isgt_uint16
 // A'*B function (dot3):     GB_Adot3B__min_isgt_uint16
-// A*B function (heap):      GB_AheapB__min_isgt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISGT_UINT16 || GxB_NO_MIN_ISGT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_uint16
 GrB_Info GB_Adot2B__min_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_uint16
 GrB_Info GB_Adot3B__min_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_uint32.c b/Source/Generated/GB_AxB__min_isgt_uint32.c
index 1ad9b9efe9..640916c29c 100644
--- a/Source/Generated/GB_AxB__min_isgt_uint32.c
+++ b/Source/Generated/GB_AxB__min_isgt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_uint32
 // A'*B function (dot2):     GB_Adot2B__min_isgt_uint32
 // A'*B function (dot3):     GB_Adot3B__min_isgt_uint32
-// A*B function (heap):      GB_AheapB__min_isgt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISGT_UINT32 || GxB_NO_MIN_ISGT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_uint32
 GrB_Info GB_Adot2B__min_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_uint32
 GrB_Info GB_Adot3B__min_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_uint64.c b/Source/Generated/GB_AxB__min_isgt_uint64.c
index 3dfa3aeaf7..e77f7eb524 100644
--- a/Source/Generated/GB_AxB__min_isgt_uint64.c
+++ b/Source/Generated/GB_AxB__min_isgt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_uint64
 // A'*B function (dot2):     GB_Adot2B__min_isgt_uint64
 // A'*B function (dot3):     GB_Adot3B__min_isgt_uint64
-// A*B function (heap):      GB_AheapB__min_isgt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISGT_UINT64 || GxB_NO_MIN_ISGT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_uint64
 GrB_Info GB_Adot2B__min_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_uint64
 GrB_Info GB_Adot3B__min_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isgt_uint8.c b/Source/Generated/GB_AxB__min_isgt_uint8.c
index 469feb5a1a..bd4ceeac14 100644
--- a/Source/Generated/GB_AxB__min_isgt_uint8.c
+++ b/Source/Generated/GB_AxB__min_isgt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isgt_uint8
 // A'*B function (dot2):     GB_Adot2B__min_isgt_uint8
 // A'*B function (dot3):     GB_Adot3B__min_isgt_uint8
-// A*B function (heap):      GB_AheapB__min_isgt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isgt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isgt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik > bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x > y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x > y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISGT_UINT8 || GxB_NO_MIN_ISGT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isgt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isgt_uint8
 GrB_Info GB_Adot2B__min_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isgt_uint8
 GrB_Info GB_Adot3B__min_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isgt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isgt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_fp32.c b/Source/Generated/GB_AxB__min_isle_fp32.c
index 15822dd9f7..6530a87e27 100644
--- a/Source/Generated/GB_AxB__min_isle_fp32.c
+++ b/Source/Generated/GB_AxB__min_isle_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_fp32
 // A'*B function (dot2):     GB_Adot2B__min_isle_fp32
 // A'*B function (dot3):     GB_Adot3B__min_isle_fp32
-// A*B function (heap):      GB_AheapB__min_isle_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik <= bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISLE_FP32 || GxB_NO_MIN_ISLE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_fp32
 GrB_Info GB_Adot2B__min_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_fp32
 GrB_Info GB_Adot3B__min_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_fp64.c b/Source/Generated/GB_AxB__min_isle_fp64.c
index a5b4fc3ea4..c5fe300223 100644
--- a/Source/Generated/GB_AxB__min_isle_fp64.c
+++ b/Source/Generated/GB_AxB__min_isle_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_fp64
 // A'*B function (dot2):     GB_Adot2B__min_isle_fp64
 // A'*B function (dot3):     GB_Adot3B__min_isle_fp64
-// A*B function (heap):      GB_AheapB__min_isle_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik <= bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISLE_FP64 || GxB_NO_MIN_ISLE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_fp64
 GrB_Info GB_Adot2B__min_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_fp64
 GrB_Info GB_Adot3B__min_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_int16.c b/Source/Generated/GB_AxB__min_isle_int16.c
index 891fcdb506..9be51e4335 100644
--- a/Source/Generated/GB_AxB__min_isle_int16.c
+++ b/Source/Generated/GB_AxB__min_isle_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_int16
 // A'*B function (dot2):     GB_Adot2B__min_isle_int16
 // A'*B function (dot3):     GB_Adot3B__min_isle_int16
-// A*B function (heap):      GB_AheapB__min_isle_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISLE_INT16 || GxB_NO_MIN_ISLE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_int16
 GrB_Info GB_Adot2B__min_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_int16
 GrB_Info GB_Adot3B__min_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_int32.c b/Source/Generated/GB_AxB__min_isle_int32.c
index dc9001b416..21a09deb95 100644
--- a/Source/Generated/GB_AxB__min_isle_int32.c
+++ b/Source/Generated/GB_AxB__min_isle_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_int32
 // A'*B function (dot2):     GB_Adot2B__min_isle_int32
 // A'*B function (dot3):     GB_Adot3B__min_isle_int32
-// A*B function (heap):      GB_AheapB__min_isle_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISLE_INT32 || GxB_NO_MIN_ISLE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_int32
 GrB_Info GB_Adot2B__min_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_int32
 GrB_Info GB_Adot3B__min_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_int64.c b/Source/Generated/GB_AxB__min_isle_int64.c
index 84886b7097..f48740ed27 100644
--- a/Source/Generated/GB_AxB__min_isle_int64.c
+++ b/Source/Generated/GB_AxB__min_isle_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_int64
 // A'*B function (dot2):     GB_Adot2B__min_isle_int64
 // A'*B function (dot3):     GB_Adot3B__min_isle_int64
-// A*B function (heap):      GB_AheapB__min_isle_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISLE_INT64 || GxB_NO_MIN_ISLE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_int64
 GrB_Info GB_Adot2B__min_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_int64
 GrB_Info GB_Adot3B__min_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_int8.c b/Source/Generated/GB_AxB__min_isle_int8.c
index a8de7d17f4..ed4920ec73 100644
--- a/Source/Generated/GB_AxB__min_isle_int8.c
+++ b/Source/Generated/GB_AxB__min_isle_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_int8
 // A'*B function (dot2):     GB_Adot2B__min_isle_int8
 // A'*B function (dot3):     GB_Adot3B__min_isle_int8
-// A*B function (heap):      GB_AheapB__min_isle_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISLE_INT8 || GxB_NO_MIN_ISLE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_int8
 GrB_Info GB_Adot2B__min_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_int8
 GrB_Info GB_Adot3B__min_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_uint16.c b/Source/Generated/GB_AxB__min_isle_uint16.c
index 971604abc0..e52482aebe 100644
--- a/Source/Generated/GB_AxB__min_isle_uint16.c
+++ b/Source/Generated/GB_AxB__min_isle_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_uint16
 // A'*B function (dot2):     GB_Adot2B__min_isle_uint16
 // A'*B function (dot3):     GB_Adot3B__min_isle_uint16
-// A*B function (heap):      GB_AheapB__min_isle_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISLE_UINT16 || GxB_NO_MIN_ISLE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_uint16
 GrB_Info GB_Adot2B__min_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_uint16
 GrB_Info GB_Adot3B__min_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_uint32.c b/Source/Generated/GB_AxB__min_isle_uint32.c
index 924c42b937..5672332671 100644
--- a/Source/Generated/GB_AxB__min_isle_uint32.c
+++ b/Source/Generated/GB_AxB__min_isle_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_uint32
 // A'*B function (dot2):     GB_Adot2B__min_isle_uint32
 // A'*B function (dot3):     GB_Adot3B__min_isle_uint32
-// A*B function (heap):      GB_AheapB__min_isle_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISLE_UINT32 || GxB_NO_MIN_ISLE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_uint32
 GrB_Info GB_Adot2B__min_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_uint32
 GrB_Info GB_Adot3B__min_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_uint64.c b/Source/Generated/GB_AxB__min_isle_uint64.c
index 8d383837ea..47201fb86a 100644
--- a/Source/Generated/GB_AxB__min_isle_uint64.c
+++ b/Source/Generated/GB_AxB__min_isle_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_uint64
 // A'*B function (dot2):     GB_Adot2B__min_isle_uint64
 // A'*B function (dot3):     GB_Adot3B__min_isle_uint64
-// A*B function (heap):      GB_AheapB__min_isle_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISLE_UINT64 || GxB_NO_MIN_ISLE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_uint64
 GrB_Info GB_Adot2B__min_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_uint64
 GrB_Info GB_Adot3B__min_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isle_uint8.c b/Source/Generated/GB_AxB__min_isle_uint8.c
index 1393ed1b4e..2d6c638a94 100644
--- a/Source/Generated/GB_AxB__min_isle_uint8.c
+++ b/Source/Generated/GB_AxB__min_isle_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isle_uint8
 // A'*B function (dot2):     GB_Adot2B__min_isle_uint8
 // A'*B function (dot3):     GB_Adot3B__min_isle_uint8
-// A*B function (heap):      GB_AheapB__min_isle_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isle_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isle_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik <= bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x <= y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x <= y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISLE_UINT8 || GxB_NO_MIN_ISLE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isle_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isle_uint8
 GrB_Info GB_Adot2B__min_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isle_uint8
 GrB_Info GB_Adot3B__min_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isle_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isle_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_fp32.c b/Source/Generated/GB_AxB__min_islt_fp32.c
index 70912003d9..aaec490dd0 100644
--- a/Source/Generated/GB_AxB__min_islt_fp32.c
+++ b/Source/Generated/GB_AxB__min_islt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_fp32
 // A'*B function (dot2):     GB_Adot2B__min_islt_fp32
 // A'*B function (dot3):     GB_Adot3B__min_islt_fp32
-// A*B function (heap):      GB_AheapB__min_islt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik < bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISLT_FP32 || GxB_NO_MIN_ISLT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_fp32
 GrB_Info GB_Adot2B__min_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_fp32
 GrB_Info GB_Adot3B__min_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_fp64.c b/Source/Generated/GB_AxB__min_islt_fp64.c
index 5b09eaff04..e5f4b8a075 100644
--- a/Source/Generated/GB_AxB__min_islt_fp64.c
+++ b/Source/Generated/GB_AxB__min_islt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_fp64
 // A'*B function (dot2):     GB_Adot2B__min_islt_fp64
 // A'*B function (dot3):     GB_Adot3B__min_islt_fp64
-// A*B function (heap):      GB_AheapB__min_islt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik < bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISLT_FP64 || GxB_NO_MIN_ISLT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_fp64
 GrB_Info GB_Adot2B__min_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_fp64
 GrB_Info GB_Adot3B__min_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_int16.c b/Source/Generated/GB_AxB__min_islt_int16.c
index 38bca95d72..fcfc0d50c2 100644
--- a/Source/Generated/GB_AxB__min_islt_int16.c
+++ b/Source/Generated/GB_AxB__min_islt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_int16
 // A'*B function (dot2):     GB_Adot2B__min_islt_int16
 // A'*B function (dot3):     GB_Adot3B__min_islt_int16
-// A*B function (heap):      GB_AheapB__min_islt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISLT_INT16 || GxB_NO_MIN_ISLT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_int16
 GrB_Info GB_Adot2B__min_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_int16
 GrB_Info GB_Adot3B__min_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_int32.c b/Source/Generated/GB_AxB__min_islt_int32.c
index 83482cc0f4..dda87237e2 100644
--- a/Source/Generated/GB_AxB__min_islt_int32.c
+++ b/Source/Generated/GB_AxB__min_islt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_int32
 // A'*B function (dot2):     GB_Adot2B__min_islt_int32
 // A'*B function (dot3):     GB_Adot3B__min_islt_int32
-// A*B function (heap):      GB_AheapB__min_islt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISLT_INT32 || GxB_NO_MIN_ISLT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_int32
 GrB_Info GB_Adot2B__min_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_int32
 GrB_Info GB_Adot3B__min_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_int64.c b/Source/Generated/GB_AxB__min_islt_int64.c
index 5e11a28e11..24c39d0dc1 100644
--- a/Source/Generated/GB_AxB__min_islt_int64.c
+++ b/Source/Generated/GB_AxB__min_islt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_int64
 // A'*B function (dot2):     GB_Adot2B__min_islt_int64
 // A'*B function (dot3):     GB_Adot3B__min_islt_int64
-// A*B function (heap):      GB_AheapB__min_islt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISLT_INT64 || GxB_NO_MIN_ISLT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_int64
 GrB_Info GB_Adot2B__min_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_int64
 GrB_Info GB_Adot3B__min_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_int8.c b/Source/Generated/GB_AxB__min_islt_int8.c
index 0a9d0a9535..1fc5dcfd6c 100644
--- a/Source/Generated/GB_AxB__min_islt_int8.c
+++ b/Source/Generated/GB_AxB__min_islt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_int8
 // A'*B function (dot2):     GB_Adot2B__min_islt_int8
 // A'*B function (dot3):     GB_Adot3B__min_islt_int8
-// A*B function (heap):      GB_AheapB__min_islt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISLT_INT8 || GxB_NO_MIN_ISLT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_int8
 GrB_Info GB_Adot2B__min_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_int8
 GrB_Info GB_Adot3B__min_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_uint16.c b/Source/Generated/GB_AxB__min_islt_uint16.c
index 8538e07075..490e28f2d0 100644
--- a/Source/Generated/GB_AxB__min_islt_uint16.c
+++ b/Source/Generated/GB_AxB__min_islt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_uint16
 // A'*B function (dot2):     GB_Adot2B__min_islt_uint16
 // A'*B function (dot3):     GB_Adot3B__min_islt_uint16
-// A*B function (heap):      GB_AheapB__min_islt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISLT_UINT16 || GxB_NO_MIN_ISLT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_uint16
 GrB_Info GB_Adot2B__min_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_uint16
 GrB_Info GB_Adot3B__min_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_uint32.c b/Source/Generated/GB_AxB__min_islt_uint32.c
index 87387a2e42..3e4a2c2fc3 100644
--- a/Source/Generated/GB_AxB__min_islt_uint32.c
+++ b/Source/Generated/GB_AxB__min_islt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_uint32
 // A'*B function (dot2):     GB_Adot2B__min_islt_uint32
 // A'*B function (dot3):     GB_Adot3B__min_islt_uint32
-// A*B function (heap):      GB_AheapB__min_islt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISLT_UINT32 || GxB_NO_MIN_ISLT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_uint32
 GrB_Info GB_Adot2B__min_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_uint32
 GrB_Info GB_Adot3B__min_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_uint64.c b/Source/Generated/GB_AxB__min_islt_uint64.c
index 516112338f..ee8542f77a 100644
--- a/Source/Generated/GB_AxB__min_islt_uint64.c
+++ b/Source/Generated/GB_AxB__min_islt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_uint64
 // A'*B function (dot2):     GB_Adot2B__min_islt_uint64
 // A'*B function (dot3):     GB_Adot3B__min_islt_uint64
-// A*B function (heap):      GB_AheapB__min_islt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISLT_UINT64 || GxB_NO_MIN_ISLT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_uint64
 GrB_Info GB_Adot2B__min_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_uint64
 GrB_Info GB_Adot3B__min_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_islt_uint8.c b/Source/Generated/GB_AxB__min_islt_uint8.c
index 272feb03c5..ab6a5ab56c 100644
--- a/Source/Generated/GB_AxB__min_islt_uint8.c
+++ b/Source/Generated/GB_AxB__min_islt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_islt_uint8
 // A'*B function (dot2):     GB_Adot2B__min_islt_uint8
 // A'*B function (dot3):     GB_Adot3B__min_islt_uint8
-// A*B function (heap):      GB_AheapB__min_islt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_islt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_islt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik < bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x < y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x < y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISLT_UINT8 || GxB_NO_MIN_ISLT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_islt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_islt_uint8
 GrB_Info GB_Adot2B__min_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_islt_uint8
 GrB_Info GB_Adot3B__min_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_islt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_islt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_fp32.c b/Source/Generated/GB_AxB__min_isne_fp32.c
index f94ef968c8..a36fbb782a 100644
--- a/Source/Generated/GB_AxB__min_isne_fp32.c
+++ b/Source/Generated/GB_AxB__min_isne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_fp32
 // A'*B function (dot2):     GB_Adot2B__min_isne_fp32
 // A'*B function (dot3):     GB_Adot3B__min_isne_fp32
-// A*B function (heap):      GB_AheapB__min_isne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik != bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_ISNE_FP32 || GxB_NO_MIN_ISNE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_fp32
 GrB_Info GB_Adot2B__min_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_fp32
 GrB_Info GB_Adot3B__min_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_fp64.c b/Source/Generated/GB_AxB__min_isne_fp64.c
index b3b9d45a18..1badd66d25 100644
--- a/Source/Generated/GB_AxB__min_isne_fp64.c
+++ b/Source/Generated/GB_AxB__min_isne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_fp64
 // A'*B function (dot2):     GB_Adot2B__min_isne_fp64
 // A'*B function (dot3):     GB_Adot3B__min_isne_fp64
-// A*B function (heap):      GB_AheapB__min_isne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik != bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_ISNE_FP64 || GxB_NO_MIN_ISNE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_fp64
 GrB_Info GB_Adot2B__min_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_fp64
 GrB_Info GB_Adot3B__min_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_int16.c b/Source/Generated/GB_AxB__min_isne_int16.c
index 7ee9eef735..c0a67c26a6 100644
--- a/Source/Generated/GB_AxB__min_isne_int16.c
+++ b/Source/Generated/GB_AxB__min_isne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_int16
 // A'*B function (dot2):     GB_Adot2B__min_isne_int16
 // A'*B function (dot3):     GB_Adot3B__min_isne_int16
-// A*B function (heap):      GB_AheapB__min_isne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_ISNE_INT16 || GxB_NO_MIN_ISNE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_int16
 GrB_Info GB_Adot2B__min_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_int16
 GrB_Info GB_Adot3B__min_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_int32.c b/Source/Generated/GB_AxB__min_isne_int32.c
index 3533549ebe..8008238aa0 100644
--- a/Source/Generated/GB_AxB__min_isne_int32.c
+++ b/Source/Generated/GB_AxB__min_isne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_int32
 // A'*B function (dot2):     GB_Adot2B__min_isne_int32
 // A'*B function (dot3):     GB_Adot3B__min_isne_int32
-// A*B function (heap):      GB_AheapB__min_isne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_ISNE_INT32 || GxB_NO_MIN_ISNE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_int32
 GrB_Info GB_Adot2B__min_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_int32
 GrB_Info GB_Adot3B__min_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_int64.c b/Source/Generated/GB_AxB__min_isne_int64.c
index 4db7556c74..ddc57f8545 100644
--- a/Source/Generated/GB_AxB__min_isne_int64.c
+++ b/Source/Generated/GB_AxB__min_isne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_int64
 // A'*B function (dot2):     GB_Adot2B__min_isne_int64
 // A'*B function (dot3):     GB_Adot3B__min_isne_int64
-// A*B function (heap):      GB_AheapB__min_isne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_ISNE_INT64 || GxB_NO_MIN_ISNE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_int64
 GrB_Info GB_Adot2B__min_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_int64
 GrB_Info GB_Adot3B__min_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_int8.c b/Source/Generated/GB_AxB__min_isne_int8.c
index fabf35f0cb..27c5161323 100644
--- a/Source/Generated/GB_AxB__min_isne_int8.c
+++ b/Source/Generated/GB_AxB__min_isne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_int8
 // A'*B function (dot2):     GB_Adot2B__min_isne_int8
 // A'*B function (dot3):     GB_Adot3B__min_isne_int8
-// A*B function (heap):      GB_AheapB__min_isne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_ISNE_INT8 || GxB_NO_MIN_ISNE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_int8
 GrB_Info GB_Adot2B__min_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_int8
 GrB_Info GB_Adot3B__min_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_uint16.c b/Source/Generated/GB_AxB__min_isne_uint16.c
index 0e37bf64fb..a87e5bdb69 100644
--- a/Source/Generated/GB_AxB__min_isne_uint16.c
+++ b/Source/Generated/GB_AxB__min_isne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_uint16
 // A'*B function (dot2):     GB_Adot2B__min_isne_uint16
 // A'*B function (dot3):     GB_Adot3B__min_isne_uint16
-// A*B function (heap):      GB_AheapB__min_isne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_ISNE_UINT16 || GxB_NO_MIN_ISNE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_uint16
 GrB_Info GB_Adot2B__min_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_uint16
 GrB_Info GB_Adot3B__min_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_uint32.c b/Source/Generated/GB_AxB__min_isne_uint32.c
index 2ff698cd84..990d56d296 100644
--- a/Source/Generated/GB_AxB__min_isne_uint32.c
+++ b/Source/Generated/GB_AxB__min_isne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_uint32
 // A'*B function (dot2):     GB_Adot2B__min_isne_uint32
 // A'*B function (dot3):     GB_Adot3B__min_isne_uint32
-// A*B function (heap):      GB_AheapB__min_isne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_ISNE_UINT32 || GxB_NO_MIN_ISNE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_uint32
 GrB_Info GB_Adot2B__min_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_uint32
 GrB_Info GB_Adot3B__min_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_uint64.c b/Source/Generated/GB_AxB__min_isne_uint64.c
index 611632dc2d..b600f9ac42 100644
--- a/Source/Generated/GB_AxB__min_isne_uint64.c
+++ b/Source/Generated/GB_AxB__min_isne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_uint64
 // A'*B function (dot2):     GB_Adot2B__min_isne_uint64
 // A'*B function (dot3):     GB_Adot3B__min_isne_uint64
-// A*B function (heap):      GB_AheapB__min_isne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_ISNE_UINT64 || GxB_NO_MIN_ISNE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_uint64
 GrB_Info GB_Adot2B__min_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_uint64
 GrB_Info GB_Adot3B__min_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_isne_uint8.c b/Source/Generated/GB_AxB__min_isne_uint8.c
index 8ab9bc9760..95092b91d4 100644
--- a/Source/Generated/GB_AxB__min_isne_uint8.c
+++ b/Source/Generated/GB_AxB__min_isne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_isne_uint8
 // A'*B function (dot2):     GB_Adot2B__min_isne_uint8
 // A'*B function (dot3):     GB_Adot3B__min_isne_uint8
-// A*B function (heap):      GB_AheapB__min_isne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_isne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_isne_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, (aik != bkj))
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, (x != y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, (x != y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_ISNE_UINT8 || GxB_NO_MIN_ISNE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_isne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_isne_uint8
 GrB_Info GB_Adot2B__min_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_isne_uint8
 GrB_Info GB_Adot3B__min_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_isne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_isne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_fp32.c b/Source/Generated/GB_AxB__min_land_fp32.c
index 9e72b4f5fc..883a7c53af 100644
--- a/Source/Generated/GB_AxB__min_land_fp32.c
+++ b/Source/Generated/GB_AxB__min_land_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_fp32
 // A'*B function (dot2):     GB_Adot2B__min_land_fp32
 // A'*B function (dot3):     GB_Adot3B__min_land_fp32
-// A*B function (heap):      GB_AheapB__min_land_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, ((aik != 0) && (bkj != 0)))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, ((x != 0) && (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, ((x != 0) && (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_LAND_FP32 || GxB_NO_MIN_LAND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_land_fp32
 GrB_Info GB_Adot2B__min_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_fp32
 GrB_Info GB_Adot3B__min_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_land_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_fp64.c b/Source/Generated/GB_AxB__min_land_fp64.c
index c8d3f457ac..3798c75666 100644
--- a/Source/Generated/GB_AxB__min_land_fp64.c
+++ b/Source/Generated/GB_AxB__min_land_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_fp64
 // A'*B function (dot2):     GB_Adot2B__min_land_fp64
 // A'*B function (dot3):     GB_Adot3B__min_land_fp64
-// A*B function (heap):      GB_AheapB__min_land_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, ((aik != 0) && (bkj != 0)))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, ((x != 0) && (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, ((x != 0) && (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_LAND_FP64 || GxB_NO_MIN_LAND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_land_fp64
 GrB_Info GB_Adot2B__min_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_fp64
 GrB_Info GB_Adot3B__min_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_land_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_int16.c b/Source/Generated/GB_AxB__min_land_int16.c
index 14d9ea2e6e..3df1a9650f 100644
--- a/Source/Generated/GB_AxB__min_land_int16.c
+++ b/Source/Generated/GB_AxB__min_land_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_int16
 // A'*B function (dot2):     GB_Adot2B__min_land_int16
 // A'*B function (dot3):     GB_Adot3B__min_land_int16
-// A*B function (heap):      GB_AheapB__min_land_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_LAND_INT16 || GxB_NO_MIN_LAND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_int16
 GrB_Info GB_Adot2B__min_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_int16
 GrB_Info GB_Adot3B__min_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_int16
+GrB_Info GB_Asaxpy3B__min_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_int32.c b/Source/Generated/GB_AxB__min_land_int32.c
index 5fb2bea91e..8b439072b5 100644
--- a/Source/Generated/GB_AxB__min_land_int32.c
+++ b/Source/Generated/GB_AxB__min_land_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_int32
 // A'*B function (dot2):     GB_Adot2B__min_land_int32
 // A'*B function (dot3):     GB_Adot3B__min_land_int32
-// A*B function (heap):      GB_AheapB__min_land_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_LAND_INT32 || GxB_NO_MIN_LAND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_int32
 GrB_Info GB_Adot2B__min_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_int32
 GrB_Info GB_Adot3B__min_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_int32
+GrB_Info GB_Asaxpy3B__min_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_int64.c b/Source/Generated/GB_AxB__min_land_int64.c
index e064370467..6c00fa002d 100644
--- a/Source/Generated/GB_AxB__min_land_int64.c
+++ b/Source/Generated/GB_AxB__min_land_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_int64
 // A'*B function (dot2):     GB_Adot2B__min_land_int64
 // A'*B function (dot3):     GB_Adot3B__min_land_int64
-// A*B function (heap):      GB_AheapB__min_land_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_LAND_INT64 || GxB_NO_MIN_LAND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_int64
 GrB_Info GB_Adot2B__min_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_int64
 GrB_Info GB_Adot3B__min_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_int64
+GrB_Info GB_Asaxpy3B__min_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_int8.c b/Source/Generated/GB_AxB__min_land_int8.c
index 74f669e7a6..5594d25803 100644
--- a/Source/Generated/GB_AxB__min_land_int8.c
+++ b/Source/Generated/GB_AxB__min_land_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_int8
 // A'*B function (dot2):     GB_Adot2B__min_land_int8
 // A'*B function (dot3):     GB_Adot3B__min_land_int8
-// A*B function (heap):      GB_AheapB__min_land_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_LAND_INT8 || GxB_NO_MIN_LAND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_int8
 GrB_Info GB_Adot2B__min_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_int8
 GrB_Info GB_Adot3B__min_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_int8
+GrB_Info GB_Asaxpy3B__min_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_uint16.c b/Source/Generated/GB_AxB__min_land_uint16.c
index a5621e32a7..c34919828f 100644
--- a/Source/Generated/GB_AxB__min_land_uint16.c
+++ b/Source/Generated/GB_AxB__min_land_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_uint16
 // A'*B function (dot2):     GB_Adot2B__min_land_uint16
 // A'*B function (dot3):     GB_Adot3B__min_land_uint16
-// A*B function (heap):      GB_AheapB__min_land_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_LAND_UINT16 || GxB_NO_MIN_LAND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_uint16
 GrB_Info GB_Adot2B__min_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_uint16
 GrB_Info GB_Adot3B__min_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_uint16
+GrB_Info GB_Asaxpy3B__min_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_uint32.c b/Source/Generated/GB_AxB__min_land_uint32.c
index 7d09b2bfaa..b0a116121a 100644
--- a/Source/Generated/GB_AxB__min_land_uint32.c
+++ b/Source/Generated/GB_AxB__min_land_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_uint32
 // A'*B function (dot2):     GB_Adot2B__min_land_uint32
 // A'*B function (dot3):     GB_Adot3B__min_land_uint32
-// A*B function (heap):      GB_AheapB__min_land_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_LAND_UINT32 || GxB_NO_MIN_LAND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_uint32
 GrB_Info GB_Adot2B__min_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_uint32
 GrB_Info GB_Adot3B__min_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_uint32
+GrB_Info GB_Asaxpy3B__min_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_uint64.c b/Source/Generated/GB_AxB__min_land_uint64.c
index 8c41647380..3fd58772ba 100644
--- a/Source/Generated/GB_AxB__min_land_uint64.c
+++ b/Source/Generated/GB_AxB__min_land_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_uint64
 // A'*B function (dot2):     GB_Adot2B__min_land_uint64
 // A'*B function (dot3):     GB_Adot3B__min_land_uint64
-// A*B function (heap):      GB_AheapB__min_land_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_LAND_UINT64 || GxB_NO_MIN_LAND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_uint64
 GrB_Info GB_Adot2B__min_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_uint64
 GrB_Info GB_Adot3B__min_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_uint64
+GrB_Info GB_Asaxpy3B__min_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_land_uint8.c b/Source/Generated/GB_AxB__min_land_uint8.c
index d64b978d3f..19471f4b6c 100644
--- a/Source/Generated/GB_AxB__min_land_uint8.c
+++ b/Source/Generated/GB_AxB__min_land_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_land_uint8
 // A'*B function (dot2):     GB_Adot2B__min_land_uint8
 // A'*B function (dot3):     GB_Adot3B__min_land_uint8
-// A*B function (heap):      GB_AheapB__min_land_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_land_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_land_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_LAND_UINT8 || GxB_NO_MIN_LAND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_land_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_land_uint8
 GrB_Info GB_Adot2B__min_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_land_uint8
 GrB_Info GB_Adot3B__min_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_land_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_land_uint8
+GrB_Info GB_Asaxpy3B__min_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_fp32.c b/Source/Generated/GB_AxB__min_lor_fp32.c
index 8c1865bbd8..0223366621 100644
--- a/Source/Generated/GB_AxB__min_lor_fp32.c
+++ b/Source/Generated/GB_AxB__min_lor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_fp32
 // A'*B function (dot2):     GB_Adot2B__min_lor_fp32
 // A'*B function (dot3):     GB_Adot3B__min_lor_fp32
-// A*B function (heap):      GB_AheapB__min_lor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, ((aik != 0) || (bkj != 0)))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, ((x != 0) || (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, ((x != 0) || (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_LOR_FP32 || GxB_NO_MIN_LOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_fp32
 GrB_Info GB_Adot2B__min_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_fp32
 GrB_Info GB_Adot3B__min_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_lor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_fp64.c b/Source/Generated/GB_AxB__min_lor_fp64.c
index 134db4a3eb..531abae581 100644
--- a/Source/Generated/GB_AxB__min_lor_fp64.c
+++ b/Source/Generated/GB_AxB__min_lor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_fp64
 // A'*B function (dot2):     GB_Adot2B__min_lor_fp64
 // A'*B function (dot3):     GB_Adot3B__min_lor_fp64
-// A*B function (heap):      GB_AheapB__min_lor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, ((aik != 0) || (bkj != 0)))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, ((x != 0) || (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, ((x != 0) || (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_LOR_FP64 || GxB_NO_MIN_LOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_fp64
 GrB_Info GB_Adot2B__min_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_fp64
 GrB_Info GB_Adot3B__min_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_lor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_int16.c b/Source/Generated/GB_AxB__min_lor_int16.c
index 369f02bb6e..9584d8d2b1 100644
--- a/Source/Generated/GB_AxB__min_lor_int16.c
+++ b/Source/Generated/GB_AxB__min_lor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_int16
 // A'*B function (dot2):     GB_Adot2B__min_lor_int16
 // A'*B function (dot3):     GB_Adot3B__min_lor_int16
-// A*B function (heap):      GB_AheapB__min_lor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_LOR_INT16 || GxB_NO_MIN_LOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_int16
 GrB_Info GB_Adot2B__min_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_int16
 GrB_Info GB_Adot3B__min_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_int16
+GrB_Info GB_Asaxpy3B__min_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_int32.c b/Source/Generated/GB_AxB__min_lor_int32.c
index 2b375e7c68..bfb386f66e 100644
--- a/Source/Generated/GB_AxB__min_lor_int32.c
+++ b/Source/Generated/GB_AxB__min_lor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_int32
 // A'*B function (dot2):     GB_Adot2B__min_lor_int32
 // A'*B function (dot3):     GB_Adot3B__min_lor_int32
-// A*B function (heap):      GB_AheapB__min_lor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_LOR_INT32 || GxB_NO_MIN_LOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_int32
 GrB_Info GB_Adot2B__min_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_int32
 GrB_Info GB_Adot3B__min_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_int32
+GrB_Info GB_Asaxpy3B__min_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_int64.c b/Source/Generated/GB_AxB__min_lor_int64.c
index a8e45e4b50..730bb2b271 100644
--- a/Source/Generated/GB_AxB__min_lor_int64.c
+++ b/Source/Generated/GB_AxB__min_lor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_int64
 // A'*B function (dot2):     GB_Adot2B__min_lor_int64
 // A'*B function (dot3):     GB_Adot3B__min_lor_int64
-// A*B function (heap):      GB_AheapB__min_lor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_LOR_INT64 || GxB_NO_MIN_LOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_int64
 GrB_Info GB_Adot2B__min_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_int64
 GrB_Info GB_Adot3B__min_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_int64
+GrB_Info GB_Asaxpy3B__min_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_int8.c b/Source/Generated/GB_AxB__min_lor_int8.c
index 2bed7fd84e..cad37239cd 100644
--- a/Source/Generated/GB_AxB__min_lor_int8.c
+++ b/Source/Generated/GB_AxB__min_lor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_int8
 // A'*B function (dot2):     GB_Adot2B__min_lor_int8
 // A'*B function (dot3):     GB_Adot3B__min_lor_int8
-// A*B function (heap):      GB_AheapB__min_lor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_LOR_INT8 || GxB_NO_MIN_LOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_int8
 GrB_Info GB_Adot2B__min_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_int8
 GrB_Info GB_Adot3B__min_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_int8
+GrB_Info GB_Asaxpy3B__min_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_uint16.c b/Source/Generated/GB_AxB__min_lor_uint16.c
index 89f77029c9..f25b6a75ed 100644
--- a/Source/Generated/GB_AxB__min_lor_uint16.c
+++ b/Source/Generated/GB_AxB__min_lor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_uint16
 // A'*B function (dot2):     GB_Adot2B__min_lor_uint16
 // A'*B function (dot3):     GB_Adot3B__min_lor_uint16
-// A*B function (heap):      GB_AheapB__min_lor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_LOR_UINT16 || GxB_NO_MIN_LOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_uint16
 GrB_Info GB_Adot2B__min_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_uint16
 GrB_Info GB_Adot3B__min_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_uint16
+GrB_Info GB_Asaxpy3B__min_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_uint32.c b/Source/Generated/GB_AxB__min_lor_uint32.c
index 15e7987761..a614274478 100644
--- a/Source/Generated/GB_AxB__min_lor_uint32.c
+++ b/Source/Generated/GB_AxB__min_lor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_uint32
 // A'*B function (dot2):     GB_Adot2B__min_lor_uint32
 // A'*B function (dot3):     GB_Adot3B__min_lor_uint32
-// A*B function (heap):      GB_AheapB__min_lor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_LOR_UINT32 || GxB_NO_MIN_LOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_uint32
 GrB_Info GB_Adot2B__min_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_uint32
 GrB_Info GB_Adot3B__min_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_uint32
+GrB_Info GB_Asaxpy3B__min_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_uint64.c b/Source/Generated/GB_AxB__min_lor_uint64.c
index 380c5d5925..0f25b38509 100644
--- a/Source/Generated/GB_AxB__min_lor_uint64.c
+++ b/Source/Generated/GB_AxB__min_lor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_uint64
 // A'*B function (dot2):     GB_Adot2B__min_lor_uint64
 // A'*B function (dot3):     GB_Adot3B__min_lor_uint64
-// A*B function (heap):      GB_AheapB__min_lor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_LOR_UINT64 || GxB_NO_MIN_LOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_uint64
 GrB_Info GB_Adot2B__min_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_uint64
 GrB_Info GB_Adot3B__min_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_uint64
+GrB_Info GB_Asaxpy3B__min_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lor_uint8.c b/Source/Generated/GB_AxB__min_lor_uint8.c
index ec25bca8aa..2af70cd003 100644
--- a/Source/Generated/GB_AxB__min_lor_uint8.c
+++ b/Source/Generated/GB_AxB__min_lor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lor_uint8
 // A'*B function (dot2):     GB_Adot2B__min_lor_uint8
 // A'*B function (dot3):     GB_Adot3B__min_lor_uint8
-// A*B function (heap):      GB_AheapB__min_lor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_lor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_LOR_UINT8 || GxB_NO_MIN_LOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lor_uint8
 GrB_Info GB_Adot2B__min_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lor_uint8
 GrB_Info GB_Adot3B__min_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lor_uint8
+GrB_Info GB_Asaxpy3B__min_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_fp32.c b/Source/Generated/GB_AxB__min_lxor_fp32.c
index 6860117e24..a529e92d3a 100644
--- a/Source/Generated/GB_AxB__min_lxor_fp32.c
+++ b/Source/Generated/GB_AxB__min_lxor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_fp32
 // A'*B function (dot2):     GB_Adot2B__min_lxor_fp32
 // A'*B function (dot3):     GB_Adot3B__min_lxor_fp32
-// A*B function (heap):      GB_AheapB__min_lxor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, ((aik != 0) != (bkj != 0)))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, ((x != 0) != (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, ((x != 0) != (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_LXOR_FP32 || GxB_NO_MIN_LXOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_fp32
 GrB_Info GB_Adot2B__min_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_fp32
 GrB_Info GB_Adot3B__min_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_lxor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_fp64.c b/Source/Generated/GB_AxB__min_lxor_fp64.c
index ea3c697b6e..4ab74dbef8 100644
--- a/Source/Generated/GB_AxB__min_lxor_fp64.c
+++ b/Source/Generated/GB_AxB__min_lxor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_fp64
 // A'*B function (dot2):     GB_Adot2B__min_lxor_fp64
 // A'*B function (dot3):     GB_Adot3B__min_lxor_fp64
-// A*B function (heap):      GB_AheapB__min_lxor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, ((aik != 0) != (bkj != 0)))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, ((x != 0) != (y != 0))) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, ((x != 0) != (y != 0)))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_LXOR_FP64 || GxB_NO_MIN_LXOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_fp64
 GrB_Info GB_Adot2B__min_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_fp64
 GrB_Info GB_Adot3B__min_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_lxor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_int16.c b/Source/Generated/GB_AxB__min_lxor_int16.c
index c120213214..829a030724 100644
--- a/Source/Generated/GB_AxB__min_lxor_int16.c
+++ b/Source/Generated/GB_AxB__min_lxor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_int16
 // A'*B function (dot2):     GB_Adot2B__min_lxor_int16
 // A'*B function (dot3):     GB_Adot3B__min_lxor_int16
-// A*B function (heap):      GB_AheapB__min_lxor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_LXOR_INT16 || GxB_NO_MIN_LXOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_int16
 GrB_Info GB_Adot2B__min_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_int16
 GrB_Info GB_Adot3B__min_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_int16
+GrB_Info GB_Asaxpy3B__min_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_int32.c b/Source/Generated/GB_AxB__min_lxor_int32.c
index e7a0764336..4bb6f90759 100644
--- a/Source/Generated/GB_AxB__min_lxor_int32.c
+++ b/Source/Generated/GB_AxB__min_lxor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_int32
 // A'*B function (dot2):     GB_Adot2B__min_lxor_int32
 // A'*B function (dot3):     GB_Adot3B__min_lxor_int32
-// A*B function (heap):      GB_AheapB__min_lxor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_LXOR_INT32 || GxB_NO_MIN_LXOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_int32
 GrB_Info GB_Adot2B__min_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_int32
 GrB_Info GB_Adot3B__min_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_int32
+GrB_Info GB_Asaxpy3B__min_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_int64.c b/Source/Generated/GB_AxB__min_lxor_int64.c
index 5871c0bd48..04671008aa 100644
--- a/Source/Generated/GB_AxB__min_lxor_int64.c
+++ b/Source/Generated/GB_AxB__min_lxor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_int64
 // A'*B function (dot2):     GB_Adot2B__min_lxor_int64
 // A'*B function (dot3):     GB_Adot3B__min_lxor_int64
-// A*B function (heap):      GB_AheapB__min_lxor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_LXOR_INT64 || GxB_NO_MIN_LXOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_int64
 GrB_Info GB_Adot2B__min_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_int64
 GrB_Info GB_Adot3B__min_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_int64
+GrB_Info GB_Asaxpy3B__min_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_int8.c b/Source/Generated/GB_AxB__min_lxor_int8.c
index 34188c21f1..c544303768 100644
--- a/Source/Generated/GB_AxB__min_lxor_int8.c
+++ b/Source/Generated/GB_AxB__min_lxor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_int8
 // A'*B function (dot2):     GB_Adot2B__min_lxor_int8
 // A'*B function (dot3):     GB_Adot3B__min_lxor_int8
-// A*B function (heap):      GB_AheapB__min_lxor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_LXOR_INT8 || GxB_NO_MIN_LXOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_int8
 GrB_Info GB_Adot2B__min_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_int8
 GrB_Info GB_Adot3B__min_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_int8
+GrB_Info GB_Asaxpy3B__min_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_uint16.c b/Source/Generated/GB_AxB__min_lxor_uint16.c
index 8ac66beba4..a3be067796 100644
--- a/Source/Generated/GB_AxB__min_lxor_uint16.c
+++ b/Source/Generated/GB_AxB__min_lxor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_uint16
 // A'*B function (dot2):     GB_Adot2B__min_lxor_uint16
 // A'*B function (dot3):     GB_Adot3B__min_lxor_uint16
-// A*B function (heap):      GB_AheapB__min_lxor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_LXOR_UINT16 || GxB_NO_MIN_LXOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_uint16
 GrB_Info GB_Adot2B__min_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_uint16
 GrB_Info GB_Adot3B__min_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_uint16
+GrB_Info GB_Asaxpy3B__min_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_uint32.c b/Source/Generated/GB_AxB__min_lxor_uint32.c
index 988c387b05..8bb76844dc 100644
--- a/Source/Generated/GB_AxB__min_lxor_uint32.c
+++ b/Source/Generated/GB_AxB__min_lxor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_uint32
 // A'*B function (dot2):     GB_Adot2B__min_lxor_uint32
 // A'*B function (dot3):     GB_Adot3B__min_lxor_uint32
-// A*B function (heap):      GB_AheapB__min_lxor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_LXOR_UINT32 || GxB_NO_MIN_LXOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_uint32
 GrB_Info GB_Adot2B__min_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_uint32
 GrB_Info GB_Adot3B__min_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_uint32
+GrB_Info GB_Asaxpy3B__min_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_uint64.c b/Source/Generated/GB_AxB__min_lxor_uint64.c
index ad395353d2..0cd2cc36f8 100644
--- a/Source/Generated/GB_AxB__min_lxor_uint64.c
+++ b/Source/Generated/GB_AxB__min_lxor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_uint64
 // A'*B function (dot2):     GB_Adot2B__min_lxor_uint64
 // A'*B function (dot3):     GB_Adot3B__min_lxor_uint64
-// A*B function (heap):      GB_AheapB__min_lxor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_LXOR_UINT64 || GxB_NO_MIN_LXOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_uint64
 GrB_Info GB_Adot2B__min_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_uint64
 GrB_Info GB_Adot3B__min_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_uint64
+GrB_Info GB_Asaxpy3B__min_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_lxor_uint8.c b/Source/Generated/GB_AxB__min_lxor_uint8.c
index 3ab7d3dd5f..f78ae4cc5f 100644
--- a/Source/Generated/GB_AxB__min_lxor_uint8.c
+++ b/Source/Generated/GB_AxB__min_lxor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_lxor_uint8
 // A'*B function (dot2):     GB_Adot2B__min_lxor_uint8
 // A'*B function (dot3):     GB_Adot3B__min_lxor_uint8
-// A*B function (heap):      GB_AheapB__min_lxor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_lxor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_lxor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_LXOR_UINT8 || GxB_NO_MIN_LXOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_lxor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_lxor_uint8
 GrB_Info GB_Adot2B__min_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_lxor_uint8
 GrB_Info GB_Adot3B__min_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_lxor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_lxor_uint8
+GrB_Info GB_Asaxpy3B__min_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_fp32.c b/Source/Generated/GB_AxB__min_max_fp32.c
index e871237fb6..8ad8d33cef 100644
--- a/Source/Generated/GB_AxB__min_max_fp32.c
+++ b/Source/Generated/GB_AxB__min_max_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_fp32
 // A'*B function (dot2):     GB_Adot2B__min_max_fp32
 // A'*B function (dot3):     GB_Adot3B__min_max_fp32
-// A*B function (heap):      GB_AheapB__min_max_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmaxf (aik, bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, fmaxf (aik, bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmaxf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmaxf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, fmaxf (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, fmaxf (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_MAX_FP32 || GxB_NO_MIN_MAX_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_max_fp32
 GrB_Info GB_Adot2B__min_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_fp32
 GrB_Info GB_Adot3B__min_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_max_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_fp64.c b/Source/Generated/GB_AxB__min_max_fp64.c
index 670688f360..1196b7ce50 100644
--- a/Source/Generated/GB_AxB__min_max_fp64.c
+++ b/Source/Generated/GB_AxB__min_max_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_fp64
 // A'*B function (dot2):     GB_Adot2B__min_max_fp64
 // A'*B function (dot3):     GB_Adot3B__min_max_fp64
-// A*B function (heap):      GB_AheapB__min_max_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmax (aik, bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, fmax (aik, bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmax (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmax (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, fmax (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, fmax (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_MAX_FP64 || GxB_NO_MIN_MAX_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_max_fp64
 GrB_Info GB_Adot2B__min_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_fp64
 GrB_Info GB_Adot3B__min_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_max_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_int16.c b/Source/Generated/GB_AxB__min_max_int16.c
index 1dacb3842b..a3139cdc79 100644
--- a/Source/Generated/GB_AxB__min_max_int16.c
+++ b/Source/Generated/GB_AxB__min_max_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_int16
 // A'*B function (dot2):     GB_Adot2B__min_max_int16
 // A'*B function (dot3):     GB_Adot3B__min_max_int16
-// A*B function (heap):      GB_AheapB__min_max_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_MAX_INT16 || GxB_NO_MIN_MAX_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_int16
 GrB_Info GB_Adot2B__min_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_int16
 GrB_Info GB_Adot3B__min_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_int16
+GrB_Info GB_Asaxpy3B__min_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_int32.c b/Source/Generated/GB_AxB__min_max_int32.c
index aca3e89db5..1f56b33139 100644
--- a/Source/Generated/GB_AxB__min_max_int32.c
+++ b/Source/Generated/GB_AxB__min_max_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_int32
 // A'*B function (dot2):     GB_Adot2B__min_max_int32
 // A'*B function (dot3):     GB_Adot3B__min_max_int32
-// A*B function (heap):      GB_AheapB__min_max_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_MAX_INT32 || GxB_NO_MIN_MAX_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_int32
 GrB_Info GB_Adot2B__min_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_int32
 GrB_Info GB_Adot3B__min_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_int32
+GrB_Info GB_Asaxpy3B__min_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_int64.c b/Source/Generated/GB_AxB__min_max_int64.c
index 400940022c..401ef70bd9 100644
--- a/Source/Generated/GB_AxB__min_max_int64.c
+++ b/Source/Generated/GB_AxB__min_max_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_int64
 // A'*B function (dot2):     GB_Adot2B__min_max_int64
 // A'*B function (dot3):     GB_Adot3B__min_max_int64
-// A*B function (heap):      GB_AheapB__min_max_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_MAX_INT64 || GxB_NO_MIN_MAX_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_int64
 GrB_Info GB_Adot2B__min_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_int64
 GrB_Info GB_Adot3B__min_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_int64
+GrB_Info GB_Asaxpy3B__min_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_int8.c b/Source/Generated/GB_AxB__min_max_int8.c
index c525de203d..107c53ba1b 100644
--- a/Source/Generated/GB_AxB__min_max_int8.c
+++ b/Source/Generated/GB_AxB__min_max_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_int8
 // A'*B function (dot2):     GB_Adot2B__min_max_int8
 // A'*B function (dot3):     GB_Adot3B__min_max_int8
-// A*B function (heap):      GB_AheapB__min_max_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_MAX_INT8 || GxB_NO_MIN_MAX_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_int8
 GrB_Info GB_Adot2B__min_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_int8
 GrB_Info GB_Adot3B__min_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_int8
+GrB_Info GB_Asaxpy3B__min_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_uint16.c b/Source/Generated/GB_AxB__min_max_uint16.c
index 7609f390f4..b75c39bb5a 100644
--- a/Source/Generated/GB_AxB__min_max_uint16.c
+++ b/Source/Generated/GB_AxB__min_max_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_uint16
 // A'*B function (dot2):     GB_Adot2B__min_max_uint16
 // A'*B function (dot3):     GB_Adot3B__min_max_uint16
-// A*B function (heap):      GB_AheapB__min_max_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_MIN_MAX_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_uint16
 GrB_Info GB_Adot2B__min_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_uint16
 GrB_Info GB_Adot3B__min_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_uint16
+GrB_Info GB_Asaxpy3B__min_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_uint32.c b/Source/Generated/GB_AxB__min_max_uint32.c
index bbf0dd3d26..03b7c5ee36 100644
--- a/Source/Generated/GB_AxB__min_max_uint32.c
+++ b/Source/Generated/GB_AxB__min_max_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_uint32
 // A'*B function (dot2):     GB_Adot2B__min_max_uint32
 // A'*B function (dot3):     GB_Adot3B__min_max_uint32
-// A*B function (heap):      GB_AheapB__min_max_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_MIN_MAX_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_uint32
 GrB_Info GB_Adot2B__min_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_uint32
 GrB_Info GB_Adot3B__min_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_uint32
+GrB_Info GB_Asaxpy3B__min_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_uint64.c b/Source/Generated/GB_AxB__min_max_uint64.c
index 6236944c25..2fac68f35e 100644
--- a/Source/Generated/GB_AxB__min_max_uint64.c
+++ b/Source/Generated/GB_AxB__min_max_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_uint64
 // A'*B function (dot2):     GB_Adot2B__min_max_uint64
 // A'*B function (dot3):     GB_Adot3B__min_max_uint64
-// A*B function (heap):      GB_AheapB__min_max_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_MIN_MAX_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_uint64
 GrB_Info GB_Adot2B__min_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_uint64
 GrB_Info GB_Adot3B__min_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_uint64
+GrB_Info GB_Asaxpy3B__min_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_max_uint8.c b/Source/Generated/GB_AxB__min_max_uint8.c
index ea5e9cc594..4677cd7b8e 100644
--- a/Source/Generated/GB_AxB__min_max_uint8.c
+++ b/Source/Generated/GB_AxB__min_max_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_max_uint8
 // A'*B function (dot2):     GB_Adot2B__min_max_uint8
 // A'*B function (dot3):     GB_Adot3B__min_max_uint8
-// A*B function (heap):      GB_AheapB__min_max_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_max_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_max_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IMAX (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMAX (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_MIN_MAX_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_max_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_max_uint8
 GrB_Info GB_Adot2B__min_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_max_uint8
 GrB_Info GB_Adot3B__min_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_max_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_max_uint8
+GrB_Info GB_Asaxpy3B__min_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_fp32.c b/Source/Generated/GB_AxB__min_min_fp32.c
index 1443bfc34e..bb44094d09 100644
--- a/Source/Generated/GB_AxB__min_min_fp32.c
+++ b/Source/Generated/GB_AxB__min_min_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_fp32
 // A'*B function (dot2):     GB_Adot2B__min_min_fp32
 // A'*B function (dot3):     GB_Adot3B__min_min_fp32
-// A*B function (heap):      GB_AheapB__min_min_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fminf (aik, bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, fminf (aik, bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fminf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fminf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, fminf (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, fminf (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_MIN_MIN_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_min_fp32
 GrB_Info GB_Adot2B__min_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_fp32
 GrB_Info GB_Adot3B__min_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_min_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_fp64.c b/Source/Generated/GB_AxB__min_min_fp64.c
index 4a4d2d936a..6714baab96 100644
--- a/Source/Generated/GB_AxB__min_min_fp64.c
+++ b/Source/Generated/GB_AxB__min_min_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_fp64
 // A'*B function (dot2):     GB_Adot2B__min_min_fp64
 // A'*B function (dot3):     GB_Adot3B__min_min_fp64
-// A*B function (heap):      GB_AheapB__min_min_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmin (aik, bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, fmin (aik, bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmin (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmin (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, fmin (x, y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, fmin (x, y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_MIN_MIN_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_min_fp64
 GrB_Info GB_Adot2B__min_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_fp64
 GrB_Info GB_Adot3B__min_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_min_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_int16.c b/Source/Generated/GB_AxB__min_min_int16.c
index 3ddec15fcf..64570b3a07 100644
--- a/Source/Generated/GB_AxB__min_min_int16.c
+++ b/Source/Generated/GB_AxB__min_min_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_int16
 // A'*B function (dot2):     GB_Adot2B__min_min_int16
 // A'*B function (dot3):     GB_Adot3B__min_min_int16
-// A*B function (heap):      GB_AheapB__min_min_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_MIN_MIN_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_int16
 GrB_Info GB_Adot2B__min_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_int16
 GrB_Info GB_Adot3B__min_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_int16
+GrB_Info GB_Asaxpy3B__min_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_int32.c b/Source/Generated/GB_AxB__min_min_int32.c
index 6a32de3d27..0c2678b75d 100644
--- a/Source/Generated/GB_AxB__min_min_int32.c
+++ b/Source/Generated/GB_AxB__min_min_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_int32
 // A'*B function (dot2):     GB_Adot2B__min_min_int32
 // A'*B function (dot3):     GB_Adot3B__min_min_int32
-// A*B function (heap):      GB_AheapB__min_min_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_MIN_MIN_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_int32
 GrB_Info GB_Adot2B__min_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_int32
 GrB_Info GB_Adot3B__min_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_int32
+GrB_Info GB_Asaxpy3B__min_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_int64.c b/Source/Generated/GB_AxB__min_min_int64.c
index acd8b27707..8188feb7cf 100644
--- a/Source/Generated/GB_AxB__min_min_int64.c
+++ b/Source/Generated/GB_AxB__min_min_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_int64
 // A'*B function (dot2):     GB_Adot2B__min_min_int64
 // A'*B function (dot3):     GB_Adot3B__min_min_int64
-// A*B function (heap):      GB_AheapB__min_min_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_MIN_MIN_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_int64
 GrB_Info GB_Adot2B__min_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_int64
 GrB_Info GB_Adot3B__min_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_int64
+GrB_Info GB_Asaxpy3B__min_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_int8.c b/Source/Generated/GB_AxB__min_min_int8.c
index 300b457b94..4093caf8c8 100644
--- a/Source/Generated/GB_AxB__min_min_int8.c
+++ b/Source/Generated/GB_AxB__min_min_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_int8
 // A'*B function (dot2):     GB_Adot2B__min_min_int8
 // A'*B function (dot3):     GB_Adot3B__min_min_int8
-// A*B function (heap):      GB_AheapB__min_min_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_MIN_MIN_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_int8
 GrB_Info GB_Adot2B__min_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_int8
 GrB_Info GB_Adot3B__min_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_int8
+GrB_Info GB_Asaxpy3B__min_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_uint16.c b/Source/Generated/GB_AxB__min_min_uint16.c
index f5f2139b81..5d70c31716 100644
--- a/Source/Generated/GB_AxB__min_min_uint16.c
+++ b/Source/Generated/GB_AxB__min_min_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_uint16
 // A'*B function (dot2):     GB_Adot2B__min_min_uint16
 // A'*B function (dot3):     GB_Adot3B__min_min_uint16
-// A*B function (heap):      GB_AheapB__min_min_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_MIN_MIN_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_uint16
 GrB_Info GB_Adot2B__min_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_uint16
 GrB_Info GB_Adot3B__min_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_uint16
+GrB_Info GB_Asaxpy3B__min_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_uint32.c b/Source/Generated/GB_AxB__min_min_uint32.c
index d75873d16b..1fb5a92b10 100644
--- a/Source/Generated/GB_AxB__min_min_uint32.c
+++ b/Source/Generated/GB_AxB__min_min_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_uint32
 // A'*B function (dot2):     GB_Adot2B__min_min_uint32
 // A'*B function (dot3):     GB_Adot3B__min_min_uint32
-// A*B function (heap):      GB_AheapB__min_min_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_MIN_MIN_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_uint32
 GrB_Info GB_Adot2B__min_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_uint32
 GrB_Info GB_Adot3B__min_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_uint32
+GrB_Info GB_Asaxpy3B__min_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_uint64.c b/Source/Generated/GB_AxB__min_min_uint64.c
index b34b350090..23428207bd 100644
--- a/Source/Generated/GB_AxB__min_min_uint64.c
+++ b/Source/Generated/GB_AxB__min_min_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_uint64
 // A'*B function (dot2):     GB_Adot2B__min_min_uint64
 // A'*B function (dot3):     GB_Adot3B__min_min_uint64
-// A*B function (heap):      GB_AheapB__min_min_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_MIN_MIN_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_uint64
 GrB_Info GB_Adot2B__min_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_uint64
 GrB_Info GB_Adot3B__min_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_uint64
+GrB_Info GB_Asaxpy3B__min_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_min_uint8.c b/Source/Generated/GB_AxB__min_min_uint8.c
index a6502f487b..43c8bbe8ca 100644
--- a/Source/Generated/GB_AxB__min_min_uint8.c
+++ b/Source/Generated/GB_AxB__min_min_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_min_uint8
 // A'*B function (dot2):     GB_Adot2B__min_min_uint8
 // A'*B function (dot3):     GB_Adot3B__min_min_uint8
-// A*B function (heap):      GB_AheapB__min_min_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_min_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_min_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IMIN (aik, bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMIN (x, y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_MIN_MIN_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_min_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_min_uint8
 GrB_Info GB_Adot2B__min_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_min_uint8
 GrB_Info GB_Adot3B__min_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_min_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_min_uint8
+GrB_Info GB_Asaxpy3B__min_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_fp32.c b/Source/Generated/GB_AxB__min_minus_fp32.c
index f78aaf31ca..be25ff7435 100644
--- a/Source/Generated/GB_AxB__min_minus_fp32.c
+++ b/Source/Generated/GB_AxB__min_minus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_fp32
 // A'*B function (dot2):     GB_Adot2B__min_minus_fp32
 // A'*B function (dot3):     GB_Adot3B__min_minus_fp32
-// A*B function (heap):      GB_AheapB__min_minus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik - bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x - y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x - y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_MINUS_FP32 || GxB_NO_MIN_MINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_fp32
 GrB_Info GB_Adot2B__min_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_fp32
 GrB_Info GB_Adot3B__min_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_minus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_fp64.c b/Source/Generated/GB_AxB__min_minus_fp64.c
index 5008fae981..0e93190dc1 100644
--- a/Source/Generated/GB_AxB__min_minus_fp64.c
+++ b/Source/Generated/GB_AxB__min_minus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_fp64
 // A'*B function (dot2):     GB_Adot2B__min_minus_fp64
 // A'*B function (dot3):     GB_Adot3B__min_minus_fp64
-// A*B function (heap):      GB_AheapB__min_minus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik - bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x - y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x - y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_MINUS_FP64 || GxB_NO_MIN_MINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_fp64
 GrB_Info GB_Adot2B__min_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_fp64
 GrB_Info GB_Adot3B__min_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_minus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_int16.c b/Source/Generated/GB_AxB__min_minus_int16.c
index 49bed0158e..500e7902ec 100644
--- a/Source/Generated/GB_AxB__min_minus_int16.c
+++ b/Source/Generated/GB_AxB__min_minus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_int16
 // A'*B function (dot2):     GB_Adot2B__min_minus_int16
 // A'*B function (dot3):     GB_Adot3B__min_minus_int16
-// A*B function (heap):      GB_AheapB__min_minus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_MINUS_INT16 || GxB_NO_MIN_MINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_int16
 GrB_Info GB_Adot2B__min_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_int16
 GrB_Info GB_Adot3B__min_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_int16
+GrB_Info GB_Asaxpy3B__min_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_int32.c b/Source/Generated/GB_AxB__min_minus_int32.c
index e67450b186..ca4781a92e 100644
--- a/Source/Generated/GB_AxB__min_minus_int32.c
+++ b/Source/Generated/GB_AxB__min_minus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_int32
 // A'*B function (dot2):     GB_Adot2B__min_minus_int32
 // A'*B function (dot3):     GB_Adot3B__min_minus_int32
-// A*B function (heap):      GB_AheapB__min_minus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_MINUS_INT32 || GxB_NO_MIN_MINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_int32
 GrB_Info GB_Adot2B__min_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_int32
 GrB_Info GB_Adot3B__min_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_int32
+GrB_Info GB_Asaxpy3B__min_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_int64.c b/Source/Generated/GB_AxB__min_minus_int64.c
index 92d0aba824..e5bae17bcc 100644
--- a/Source/Generated/GB_AxB__min_minus_int64.c
+++ b/Source/Generated/GB_AxB__min_minus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_int64
 // A'*B function (dot2):     GB_Adot2B__min_minus_int64
 // A'*B function (dot3):     GB_Adot3B__min_minus_int64
-// A*B function (heap):      GB_AheapB__min_minus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_MINUS_INT64 || GxB_NO_MIN_MINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_int64
 GrB_Info GB_Adot2B__min_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_int64
 GrB_Info GB_Adot3B__min_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_int64
+GrB_Info GB_Asaxpy3B__min_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_int8.c b/Source/Generated/GB_AxB__min_minus_int8.c
index 058d1b5735..cd0bcc3aaf 100644
--- a/Source/Generated/GB_AxB__min_minus_int8.c
+++ b/Source/Generated/GB_AxB__min_minus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_int8
 // A'*B function (dot2):     GB_Adot2B__min_minus_int8
 // A'*B function (dot3):     GB_Adot3B__min_minus_int8
-// A*B function (heap):      GB_AheapB__min_minus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_MINUS_INT8 || GxB_NO_MIN_MINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_int8
 GrB_Info GB_Adot2B__min_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_int8
 GrB_Info GB_Adot3B__min_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_int8
+GrB_Info GB_Asaxpy3B__min_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_uint16.c b/Source/Generated/GB_AxB__min_minus_uint16.c
index 9db24dc4b6..5c94cad430 100644
--- a/Source/Generated/GB_AxB__min_minus_uint16.c
+++ b/Source/Generated/GB_AxB__min_minus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_uint16
 // A'*B function (dot2):     GB_Adot2B__min_minus_uint16
 // A'*B function (dot3):     GB_Adot3B__min_minus_uint16
-// A*B function (heap):      GB_AheapB__min_minus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_MINUS_UINT16 || GxB_NO_MIN_MINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_uint16
 GrB_Info GB_Adot2B__min_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_uint16
 GrB_Info GB_Adot3B__min_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_uint16
+GrB_Info GB_Asaxpy3B__min_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_uint32.c b/Source/Generated/GB_AxB__min_minus_uint32.c
index c2a8e30f03..eea70cf7e6 100644
--- a/Source/Generated/GB_AxB__min_minus_uint32.c
+++ b/Source/Generated/GB_AxB__min_minus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_uint32
 // A'*B function (dot2):     GB_Adot2B__min_minus_uint32
 // A'*B function (dot3):     GB_Adot3B__min_minus_uint32
-// A*B function (heap):      GB_AheapB__min_minus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_MINUS_UINT32 || GxB_NO_MIN_MINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_uint32
 GrB_Info GB_Adot2B__min_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_uint32
 GrB_Info GB_Adot3B__min_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_uint32
+GrB_Info GB_Asaxpy3B__min_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_uint64.c b/Source/Generated/GB_AxB__min_minus_uint64.c
index 8ffe4ed3b5..e044d5f562 100644
--- a/Source/Generated/GB_AxB__min_minus_uint64.c
+++ b/Source/Generated/GB_AxB__min_minus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_uint64
 // A'*B function (dot2):     GB_Adot2B__min_minus_uint64
 // A'*B function (dot3):     GB_Adot3B__min_minus_uint64
-// A*B function (heap):      GB_AheapB__min_minus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_MINUS_UINT64 || GxB_NO_MIN_MINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_uint64
 GrB_Info GB_Adot2B__min_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_uint64
 GrB_Info GB_Adot3B__min_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_uint64
+GrB_Info GB_Asaxpy3B__min_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_minus_uint8.c b/Source/Generated/GB_AxB__min_minus_uint8.c
index 472dde6410..a5f4482d69 100644
--- a/Source/Generated/GB_AxB__min_minus_uint8.c
+++ b/Source/Generated/GB_AxB__min_minus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_minus_uint8
 // A'*B function (dot2):     GB_Adot2B__min_minus_uint8
 // A'*B function (dot3):     GB_Adot3B__min_minus_uint8
-// A*B function (heap):      GB_AheapB__min_minus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_minus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_minus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik - bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x - y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_MINUS_UINT8 || GxB_NO_MIN_MINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_minus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_minus_uint8
 GrB_Info GB_Adot2B__min_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_minus_uint8
 GrB_Info GB_Adot3B__min_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_minus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_minus_uint8
+GrB_Info GB_Asaxpy3B__min_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_fp32.c b/Source/Generated/GB_AxB__min_plus_fp32.c
index a3213a9889..ed70a6e46f 100644
--- a/Source/Generated/GB_AxB__min_plus_fp32.c
+++ b/Source/Generated/GB_AxB__min_plus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_fp32
 // A'*B function (dot2):     GB_Adot2B__min_plus_fp32
 // A'*B function (dot3):     GB_Adot3B__min_plus_fp32
-// A*B function (heap):      GB_AheapB__min_plus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik + bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x + y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x + y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_MIN_PLUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_fp32
 GrB_Info GB_Adot2B__min_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_fp32
 GrB_Info GB_Adot3B__min_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_plus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_fp64.c b/Source/Generated/GB_AxB__min_plus_fp64.c
index 2b401b13ba..f217d6f26e 100644
--- a/Source/Generated/GB_AxB__min_plus_fp64.c
+++ b/Source/Generated/GB_AxB__min_plus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_fp64
 // A'*B function (dot2):     GB_Adot2B__min_plus_fp64
 // A'*B function (dot3):     GB_Adot3B__min_plus_fp64
-// A*B function (heap):      GB_AheapB__min_plus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik + bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x + y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x + y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_MIN_PLUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_fp64
 GrB_Info GB_Adot2B__min_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_fp64
 GrB_Info GB_Adot3B__min_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_plus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_int16.c b/Source/Generated/GB_AxB__min_plus_int16.c
index 95b650cb42..cc40d34156 100644
--- a/Source/Generated/GB_AxB__min_plus_int16.c
+++ b/Source/Generated/GB_AxB__min_plus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_int16
 // A'*B function (dot2):     GB_Adot2B__min_plus_int16
 // A'*B function (dot3):     GB_Adot3B__min_plus_int16
-// A*B function (heap):      GB_AheapB__min_plus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_MIN_PLUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_int16
 GrB_Info GB_Adot2B__min_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_int16
 GrB_Info GB_Adot3B__min_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_int16
+GrB_Info GB_Asaxpy3B__min_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_int32.c b/Source/Generated/GB_AxB__min_plus_int32.c
index 535f4e1c59..89e8b4aa36 100644
--- a/Source/Generated/GB_AxB__min_plus_int32.c
+++ b/Source/Generated/GB_AxB__min_plus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_int32
 // A'*B function (dot2):     GB_Adot2B__min_plus_int32
 // A'*B function (dot3):     GB_Adot3B__min_plus_int32
-// A*B function (heap):      GB_AheapB__min_plus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_MIN_PLUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_int32
 GrB_Info GB_Adot2B__min_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_int32
 GrB_Info GB_Adot3B__min_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_int32
+GrB_Info GB_Asaxpy3B__min_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_int64.c b/Source/Generated/GB_AxB__min_plus_int64.c
index 482b0cf582..5cac9cdcf6 100644
--- a/Source/Generated/GB_AxB__min_plus_int64.c
+++ b/Source/Generated/GB_AxB__min_plus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_int64
 // A'*B function (dot2):     GB_Adot2B__min_plus_int64
 // A'*B function (dot3):     GB_Adot3B__min_plus_int64
-// A*B function (heap):      GB_AheapB__min_plus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_MIN_PLUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_int64
 GrB_Info GB_Adot2B__min_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_int64
 GrB_Info GB_Adot3B__min_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_int64
+GrB_Info GB_Asaxpy3B__min_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_int8.c b/Source/Generated/GB_AxB__min_plus_int8.c
index 64d2432ac7..bb5658b326 100644
--- a/Source/Generated/GB_AxB__min_plus_int8.c
+++ b/Source/Generated/GB_AxB__min_plus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_int8
 // A'*B function (dot2):     GB_Adot2B__min_plus_int8
 // A'*B function (dot3):     GB_Adot3B__min_plus_int8
-// A*B function (heap):      GB_AheapB__min_plus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_MIN_PLUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_int8
 GrB_Info GB_Adot2B__min_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_int8
 GrB_Info GB_Adot3B__min_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_int8
+GrB_Info GB_Asaxpy3B__min_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_uint16.c b/Source/Generated/GB_AxB__min_plus_uint16.c
index 061da564fc..62b14a6775 100644
--- a/Source/Generated/GB_AxB__min_plus_uint16.c
+++ b/Source/Generated/GB_AxB__min_plus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_uint16
 // A'*B function (dot2):     GB_Adot2B__min_plus_uint16
 // A'*B function (dot3):     GB_Adot3B__min_plus_uint16
-// A*B function (heap):      GB_AheapB__min_plus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_MIN_PLUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_uint16
 GrB_Info GB_Adot2B__min_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_uint16
 GrB_Info GB_Adot3B__min_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_uint16
+GrB_Info GB_Asaxpy3B__min_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_uint32.c b/Source/Generated/GB_AxB__min_plus_uint32.c
index f029f97a8e..ee6860a066 100644
--- a/Source/Generated/GB_AxB__min_plus_uint32.c
+++ b/Source/Generated/GB_AxB__min_plus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_uint32
 // A'*B function (dot2):     GB_Adot2B__min_plus_uint32
 // A'*B function (dot3):     GB_Adot3B__min_plus_uint32
-// A*B function (heap):      GB_AheapB__min_plus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_MIN_PLUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_uint32
 GrB_Info GB_Adot2B__min_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_uint32
 GrB_Info GB_Adot3B__min_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_uint32
+GrB_Info GB_Asaxpy3B__min_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_uint64.c b/Source/Generated/GB_AxB__min_plus_uint64.c
index 0dcc5863ec..48a698222b 100644
--- a/Source/Generated/GB_AxB__min_plus_uint64.c
+++ b/Source/Generated/GB_AxB__min_plus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_uint64
 // A'*B function (dot2):     GB_Adot2B__min_plus_uint64
 // A'*B function (dot3):     GB_Adot3B__min_plus_uint64
-// A*B function (heap):      GB_AheapB__min_plus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_MIN_PLUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_uint64
 GrB_Info GB_Adot2B__min_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_uint64
 GrB_Info GB_Adot3B__min_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_uint64
+GrB_Info GB_Asaxpy3B__min_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_plus_uint8.c b/Source/Generated/GB_AxB__min_plus_uint8.c
index 5534b674b9..0b55f6062c 100644
--- a/Source/Generated/GB_AxB__min_plus_uint8.c
+++ b/Source/Generated/GB_AxB__min_plus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_plus_uint8
 // A'*B function (dot2):     GB_Adot2B__min_plus_uint8
 // A'*B function (dot3):     GB_Adot3B__min_plus_uint8
-// A*B function (heap):      GB_AheapB__min_plus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_plus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_plus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik + bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x + y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_MIN_PLUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_plus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_plus_uint8
 GrB_Info GB_Adot2B__min_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_plus_uint8
 GrB_Info GB_Adot3B__min_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_plus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_plus_uint8
+GrB_Info GB_Asaxpy3B__min_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_fp32.c b/Source/Generated/GB_AxB__min_rdiv_fp32.c
index e4277441b1..39743d55f2 100644
--- a/Source/Generated/GB_AxB__min_rdiv_fp32.c
+++ b/Source/Generated/GB_AxB__min_rdiv_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_fp32
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_fp32
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_fp32
-// A*B function (heap):      GB_AheapB__min_rdiv_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (bkj / aik))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (y / x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (y / x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_RDIV_FP32 || GxB_NO_MIN_RDIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_fp32
 GrB_Info GB_Adot2B__min_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_fp32
 GrB_Info GB_Adot3B__min_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_rdiv_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_rdiv_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_fp64.c b/Source/Generated/GB_AxB__min_rdiv_fp64.c
index d7f05d1cca..ab3865b7c0 100644
--- a/Source/Generated/GB_AxB__min_rdiv_fp64.c
+++ b/Source/Generated/GB_AxB__min_rdiv_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_fp64
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_fp64
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_fp64
-// A*B function (heap):      GB_AheapB__min_rdiv_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (bkj / aik))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (y / x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (y / x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_RDIV_FP64 || GxB_NO_MIN_RDIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_fp64
 GrB_Info GB_Adot2B__min_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_fp64
 GrB_Info GB_Adot3B__min_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_rdiv_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_rdiv_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_int16.c b/Source/Generated/GB_AxB__min_rdiv_int16.c
index 3d90d7fd47..276dd631cd 100644
--- a/Source/Generated/GB_AxB__min_rdiv_int16.c
+++ b/Source/Generated/GB_AxB__min_rdiv_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_int16
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_int16
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_int16
-// A*B function (heap):      GB_AheapB__min_rdiv_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 16)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 16) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_RDIV_INT16 || GxB_NO_MIN_RDIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_int16
 GrB_Info GB_Adot2B__min_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_int16
 GrB_Info GB_Adot3B__min_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_int16
+GrB_Info GB_Asaxpy3B__min_rdiv_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_int32.c b/Source/Generated/GB_AxB__min_rdiv_int32.c
index 1cd7f43251..609f2d34bc 100644
--- a/Source/Generated/GB_AxB__min_rdiv_int32.c
+++ b/Source/Generated/GB_AxB__min_rdiv_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_int32
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_int32
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_int32
-// A*B function (heap):      GB_AheapB__min_rdiv_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 32)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 32) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_RDIV_INT32 || GxB_NO_MIN_RDIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_int32
 GrB_Info GB_Adot2B__min_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_int32
 GrB_Info GB_Adot3B__min_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_int32
+GrB_Info GB_Asaxpy3B__min_rdiv_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_int64.c b/Source/Generated/GB_AxB__min_rdiv_int64.c
index 9119188f7c..c037611d08 100644
--- a/Source/Generated/GB_AxB__min_rdiv_int64.c
+++ b/Source/Generated/GB_AxB__min_rdiv_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_int64
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_int64
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_int64
-// A*B function (heap):      GB_AheapB__min_rdiv_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 64)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 64) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_RDIV_INT64 || GxB_NO_MIN_RDIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_int64
 GrB_Info GB_Adot2B__min_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_int64
 GrB_Info GB_Adot3B__min_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_int64
+GrB_Info GB_Asaxpy3B__min_rdiv_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_int8.c b/Source/Generated/GB_AxB__min_rdiv_int8.c
index ecfc684d9f..e01db6a46c 100644
--- a/Source/Generated/GB_AxB__min_rdiv_int8.c
+++ b/Source/Generated/GB_AxB__min_rdiv_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_int8
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_int8
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_int8
-// A*B function (heap):      GB_AheapB__min_rdiv_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 8)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 8) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_RDIV_INT8 || GxB_NO_MIN_RDIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_int8
 GrB_Info GB_Adot2B__min_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_int8
 GrB_Info GB_Adot3B__min_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_int8
+GrB_Info GB_Asaxpy3B__min_rdiv_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_uint16.c b/Source/Generated/GB_AxB__min_rdiv_uint16.c
index 2b271f7427..751323805e 100644
--- a/Source/Generated/GB_AxB__min_rdiv_uint16.c
+++ b/Source/Generated/GB_AxB__min_rdiv_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_uint16
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_uint16
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_uint16
-// A*B function (heap):      GB_AheapB__min_rdiv_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 16)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 16) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_RDIV_UINT16 || GxB_NO_MIN_RDIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_uint16
 GrB_Info GB_Adot2B__min_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_uint16
 GrB_Info GB_Adot3B__min_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_uint16
+GrB_Info GB_Asaxpy3B__min_rdiv_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_uint32.c b/Source/Generated/GB_AxB__min_rdiv_uint32.c
index c872ae1b6d..cac80b7030 100644
--- a/Source/Generated/GB_AxB__min_rdiv_uint32.c
+++ b/Source/Generated/GB_AxB__min_rdiv_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_uint32
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_uint32
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_uint32
-// A*B function (heap):      GB_AheapB__min_rdiv_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 32)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 32) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_RDIV_UINT32 || GxB_NO_MIN_RDIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_uint32
 GrB_Info GB_Adot2B__min_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_uint32
 GrB_Info GB_Adot3B__min_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_uint32
+GrB_Info GB_Asaxpy3B__min_rdiv_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_uint64.c b/Source/Generated/GB_AxB__min_rdiv_uint64.c
index 4af7819e20..07077f80a9 100644
--- a/Source/Generated/GB_AxB__min_rdiv_uint64.c
+++ b/Source/Generated/GB_AxB__min_rdiv_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_uint64
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_uint64
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_uint64
-// A*B function (heap):      GB_AheapB__min_rdiv_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 64)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 64) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_RDIV_UINT64 || GxB_NO_MIN_RDIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_uint64
 GrB_Info GB_Adot2B__min_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_uint64
 GrB_Info GB_Adot3B__min_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_uint64
+GrB_Info GB_Asaxpy3B__min_rdiv_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rdiv_uint8.c b/Source/Generated/GB_AxB__min_rdiv_uint8.c
index b139cf717e..7e35a65fdf 100644
--- a/Source/Generated/GB_AxB__min_rdiv_uint8.c
+++ b/Source/Generated/GB_AxB__min_rdiv_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rdiv_uint8
 // A'*B function (dot2):     GB_Adot2B__min_rdiv_uint8
 // A'*B function (dot3):     GB_Adot3B__min_rdiv_uint8
-// A*B function (heap):      GB_AheapB__min_rdiv_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_rdiv_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rdiv_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 8)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 8) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_RDIV_UINT8 || GxB_NO_MIN_RDIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rdiv_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rdiv_uint8
 GrB_Info GB_Adot2B__min_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rdiv_uint8
 GrB_Info GB_Adot3B__min_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rdiv_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rdiv_uint8
+GrB_Info GB_Asaxpy3B__min_rdiv_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_fp32.c b/Source/Generated/GB_AxB__min_rminus_fp32.c
index fe8d8bafda..384b191f8d 100644
--- a/Source/Generated/GB_AxB__min_rminus_fp32.c
+++ b/Source/Generated/GB_AxB__min_rminus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_fp32
 // A'*B function (dot2):     GB_Adot2B__min_rminus_fp32
 // A'*B function (dot3):     GB_Adot3B__min_rminus_fp32
-// A*B function (heap):      GB_AheapB__min_rminus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (bkj - aik))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (y - x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (y - x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_RMINUS_FP32 || GxB_NO_MIN_RMINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_fp32
 GrB_Info GB_Adot2B__min_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_fp32
 GrB_Info GB_Adot3B__min_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_rminus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_rminus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_fp64.c b/Source/Generated/GB_AxB__min_rminus_fp64.c
index 121837499f..e5e8927bec 100644
--- a/Source/Generated/GB_AxB__min_rminus_fp64.c
+++ b/Source/Generated/GB_AxB__min_rminus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_fp64
 // A'*B function (dot2):     GB_Adot2B__min_rminus_fp64
 // A'*B function (dot3):     GB_Adot3B__min_rminus_fp64
-// A*B function (heap):      GB_AheapB__min_rminus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (bkj - aik))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (y - x)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (y - x))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_RMINUS_FP64 || GxB_NO_MIN_RMINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_fp64
 GrB_Info GB_Adot2B__min_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_fp64
 GrB_Info GB_Adot3B__min_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_rminus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_rminus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_int16.c b/Source/Generated/GB_AxB__min_rminus_int16.c
index 86378a69eb..caa5ff0d65 100644
--- a/Source/Generated/GB_AxB__min_rminus_int16.c
+++ b/Source/Generated/GB_AxB__min_rminus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_int16
 // A'*B function (dot2):     GB_Adot2B__min_rminus_int16
 // A'*B function (dot3):     GB_Adot3B__min_rminus_int16
-// A*B function (heap):      GB_AheapB__min_rminus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_RMINUS_INT16 || GxB_NO_MIN_RMINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_int16
 GrB_Info GB_Adot2B__min_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_int16
 GrB_Info GB_Adot3B__min_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_int16
+GrB_Info GB_Asaxpy3B__min_rminus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_int32.c b/Source/Generated/GB_AxB__min_rminus_int32.c
index d306421cc1..a0f86e4d82 100644
--- a/Source/Generated/GB_AxB__min_rminus_int32.c
+++ b/Source/Generated/GB_AxB__min_rminus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_int32
 // A'*B function (dot2):     GB_Adot2B__min_rminus_int32
 // A'*B function (dot3):     GB_Adot3B__min_rminus_int32
-// A*B function (heap):      GB_AheapB__min_rminus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_RMINUS_INT32 || GxB_NO_MIN_RMINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_int32
 GrB_Info GB_Adot2B__min_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_int32
 GrB_Info GB_Adot3B__min_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_int32
+GrB_Info GB_Asaxpy3B__min_rminus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_int64.c b/Source/Generated/GB_AxB__min_rminus_int64.c
index d222da0c1d..2f6868f94d 100644
--- a/Source/Generated/GB_AxB__min_rminus_int64.c
+++ b/Source/Generated/GB_AxB__min_rminus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_int64
 // A'*B function (dot2):     GB_Adot2B__min_rminus_int64
 // A'*B function (dot3):     GB_Adot3B__min_rminus_int64
-// A*B function (heap):      GB_AheapB__min_rminus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_RMINUS_INT64 || GxB_NO_MIN_RMINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_int64
 GrB_Info GB_Adot2B__min_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_int64
 GrB_Info GB_Adot3B__min_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_int64
+GrB_Info GB_Asaxpy3B__min_rminus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_int8.c b/Source/Generated/GB_AxB__min_rminus_int8.c
index fec3dfce1a..ceb286ce7e 100644
--- a/Source/Generated/GB_AxB__min_rminus_int8.c
+++ b/Source/Generated/GB_AxB__min_rminus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_int8
 // A'*B function (dot2):     GB_Adot2B__min_rminus_int8
 // A'*B function (dot3):     GB_Adot3B__min_rminus_int8
-// A*B function (heap):      GB_AheapB__min_rminus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_RMINUS_INT8 || GxB_NO_MIN_RMINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_int8
 GrB_Info GB_Adot2B__min_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_int8
 GrB_Info GB_Adot3B__min_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_int8
+GrB_Info GB_Asaxpy3B__min_rminus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_uint16.c b/Source/Generated/GB_AxB__min_rminus_uint16.c
index 21649b2c79..536ea20cf3 100644
--- a/Source/Generated/GB_AxB__min_rminus_uint16.c
+++ b/Source/Generated/GB_AxB__min_rminus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_uint16
 // A'*B function (dot2):     GB_Adot2B__min_rminus_uint16
 // A'*B function (dot3):     GB_Adot3B__min_rminus_uint16
-// A*B function (heap):      GB_AheapB__min_rminus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_RMINUS_UINT16 || GxB_NO_MIN_RMINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_uint16
 GrB_Info GB_Adot2B__min_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_uint16
 GrB_Info GB_Adot3B__min_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_uint16
+GrB_Info GB_Asaxpy3B__min_rminus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_uint32.c b/Source/Generated/GB_AxB__min_rminus_uint32.c
index e356ae61f1..d377e050be 100644
--- a/Source/Generated/GB_AxB__min_rminus_uint32.c
+++ b/Source/Generated/GB_AxB__min_rminus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_uint32
 // A'*B function (dot2):     GB_Adot2B__min_rminus_uint32
 // A'*B function (dot3):     GB_Adot3B__min_rminus_uint32
-// A*B function (heap):      GB_AheapB__min_rminus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_RMINUS_UINT32 || GxB_NO_MIN_RMINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_uint32
 GrB_Info GB_Adot2B__min_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_uint32
 GrB_Info GB_Adot3B__min_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_uint32
+GrB_Info GB_Asaxpy3B__min_rminus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_uint64.c b/Source/Generated/GB_AxB__min_rminus_uint64.c
index 1c816600c7..5d49cc600a 100644
--- a/Source/Generated/GB_AxB__min_rminus_uint64.c
+++ b/Source/Generated/GB_AxB__min_rminus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_uint64
 // A'*B function (dot2):     GB_Adot2B__min_rminus_uint64
 // A'*B function (dot3):     GB_Adot3B__min_rminus_uint64
-// A*B function (heap):      GB_AheapB__min_rminus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_RMINUS_UINT64 || GxB_NO_MIN_RMINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_uint64
 GrB_Info GB_Adot2B__min_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_uint64
 GrB_Info GB_Adot3B__min_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_uint64
+GrB_Info GB_Asaxpy3B__min_rminus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_rminus_uint8.c b/Source/Generated/GB_AxB__min_rminus_uint8.c
index 1003a88371..aeec47b502 100644
--- a/Source/Generated/GB_AxB__min_rminus_uint8.c
+++ b/Source/Generated/GB_AxB__min_rminus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_rminus_uint8
 // A'*B function (dot2):     GB_Adot2B__min_rminus_uint8
 // A'*B function (dot3):     GB_Adot3B__min_rminus_uint8
-// A*B function (heap):      GB_AheapB__min_rminus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_rminus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_rminus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (bkj - aik) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (y - x) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_RMINUS_UINT8 || GxB_NO_MIN_RMINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_rminus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_rminus_uint8
 GrB_Info GB_Adot2B__min_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_rminus_uint8
 GrB_Info GB_Adot3B__min_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_rminus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_rminus_uint8
+GrB_Info GB_Asaxpy3B__min_rminus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_fp32.c b/Source/Generated/GB_AxB__min_second_fp32.c
index c51f1e572b..2759221e23 100644
--- a/Source/Generated/GB_AxB__min_second_fp32.c
+++ b/Source/Generated/GB_AxB__min_second_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_fp32
 // A'*B function (dot2):     GB_Adot2B__min_second_fp32
 // A'*B function (dot3):     GB_Adot3B__min_second_fp32
-// A*B function (heap):      GB_AheapB__min_second_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, bkj)
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_SECOND_FP32 || GxB_NO_MIN_SECOND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_fp32
 GrB_Info GB_Adot2B__min_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_fp32
 GrB_Info GB_Adot3B__min_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_fp64.c b/Source/Generated/GB_AxB__min_second_fp64.c
index b46d6f7d85..6d4c09407e 100644
--- a/Source/Generated/GB_AxB__min_second_fp64.c
+++ b/Source/Generated/GB_AxB__min_second_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_fp64
 // A'*B function (dot2):     GB_Adot2B__min_second_fp64
 // A'*B function (dot3):     GB_Adot3B__min_second_fp64
-// A*B function (heap):      GB_AheapB__min_second_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, bkj)
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_SECOND_FP64 || GxB_NO_MIN_SECOND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_fp64
 GrB_Info GB_Adot2B__min_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_fp64
 GrB_Info GB_Adot3B__min_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_int16.c b/Source/Generated/GB_AxB__min_second_int16.c
index bbaab703ef..3b49e8397c 100644
--- a/Source/Generated/GB_AxB__min_second_int16.c
+++ b/Source/Generated/GB_AxB__min_second_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_int16
 // A'*B function (dot2):     GB_Adot2B__min_second_int16
 // A'*B function (dot3):     GB_Adot3B__min_second_int16
-// A*B function (heap):      GB_AheapB__min_second_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_SECOND_INT16 || GxB_NO_MIN_SECOND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_int16
 GrB_Info GB_Adot2B__min_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_int16
 GrB_Info GB_Adot3B__min_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_int32.c b/Source/Generated/GB_AxB__min_second_int32.c
index 991a0433ab..de687d6a06 100644
--- a/Source/Generated/GB_AxB__min_second_int32.c
+++ b/Source/Generated/GB_AxB__min_second_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_int32
 // A'*B function (dot2):     GB_Adot2B__min_second_int32
 // A'*B function (dot3):     GB_Adot3B__min_second_int32
-// A*B function (heap):      GB_AheapB__min_second_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_SECOND_INT32 || GxB_NO_MIN_SECOND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_int32
 GrB_Info GB_Adot2B__min_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_int32
 GrB_Info GB_Adot3B__min_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_int64.c b/Source/Generated/GB_AxB__min_second_int64.c
index f379e9284a..74865eb9d8 100644
--- a/Source/Generated/GB_AxB__min_second_int64.c
+++ b/Source/Generated/GB_AxB__min_second_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_int64
 // A'*B function (dot2):     GB_Adot2B__min_second_int64
 // A'*B function (dot3):     GB_Adot3B__min_second_int64
-// A*B function (heap):      GB_AheapB__min_second_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_SECOND_INT64 || GxB_NO_MIN_SECOND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_int64
 GrB_Info GB_Adot2B__min_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_int64
 GrB_Info GB_Adot3B__min_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_int8.c b/Source/Generated/GB_AxB__min_second_int8.c
index 381e2ed1ff..aa1a922f30 100644
--- a/Source/Generated/GB_AxB__min_second_int8.c
+++ b/Source/Generated/GB_AxB__min_second_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_int8
 // A'*B function (dot2):     GB_Adot2B__min_second_int8
 // A'*B function (dot3):     GB_Adot3B__min_second_int8
-// A*B function (heap):      GB_AheapB__min_second_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_SECOND_INT8 || GxB_NO_MIN_SECOND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_int8
 GrB_Info GB_Adot2B__min_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_int8
 GrB_Info GB_Adot3B__min_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_uint16.c b/Source/Generated/GB_AxB__min_second_uint16.c
index 2d687988a6..35ce713953 100644
--- a/Source/Generated/GB_AxB__min_second_uint16.c
+++ b/Source/Generated/GB_AxB__min_second_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_uint16
 // A'*B function (dot2):     GB_Adot2B__min_second_uint16
 // A'*B function (dot3):     GB_Adot3B__min_second_uint16
-// A*B function (heap):      GB_AheapB__min_second_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_SECOND_UINT16 || GxB_NO_MIN_SECOND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_uint16
 GrB_Info GB_Adot2B__min_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_uint16
 GrB_Info GB_Adot3B__min_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_uint32.c b/Source/Generated/GB_AxB__min_second_uint32.c
index 97b5c37b03..29a357916c 100644
--- a/Source/Generated/GB_AxB__min_second_uint32.c
+++ b/Source/Generated/GB_AxB__min_second_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_uint32
 // A'*B function (dot2):     GB_Adot2B__min_second_uint32
 // A'*B function (dot3):     GB_Adot3B__min_second_uint32
-// A*B function (heap):      GB_AheapB__min_second_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_SECOND_UINT32 || GxB_NO_MIN_SECOND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_uint32
 GrB_Info GB_Adot2B__min_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_uint32
 GrB_Info GB_Adot3B__min_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_uint64.c b/Source/Generated/GB_AxB__min_second_uint64.c
index 4e9aaaca1e..860925c966 100644
--- a/Source/Generated/GB_AxB__min_second_uint64.c
+++ b/Source/Generated/GB_AxB__min_second_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_uint64
 // A'*B function (dot2):     GB_Adot2B__min_second_uint64
 // A'*B function (dot3):     GB_Adot3B__min_second_uint64
-// A*B function (heap):      GB_AheapB__min_second_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_SECOND_UINT64 || GxB_NO_MIN_SECOND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_uint64
 GrB_Info GB_Adot2B__min_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_uint64
 GrB_Info GB_Adot3B__min_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_second_uint8.c b/Source/Generated/GB_AxB__min_second_uint8.c
index edf21e8229..ec40a26bbb 100644
--- a/Source/Generated/GB_AxB__min_second_uint8.c
+++ b/Source/Generated/GB_AxB__min_second_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_second_uint8
 // A'*B function (dot2):     GB_Adot2B__min_second_uint8
 // A'*B function (dot3):     GB_Adot3B__min_second_uint8
-// A*B function (heap):      GB_AheapB__min_second_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_second_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_second_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = GB_IMIN (cij, bkj)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = GB_IMIN (z, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = GB_IMIN (z, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_SECOND_UINT8 || GxB_NO_MIN_SECOND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_second_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_second_uint8
 GrB_Info GB_Adot2B__min_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_second_uint8
 GrB_Info GB_Adot3B__min_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_second_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_second_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_fp32.c b/Source/Generated/GB_AxB__min_times_fp32.c
index cd7b510eda..75ab49ee51 100644
--- a/Source/Generated/GB_AxB__min_times_fp32.c
+++ b/Source/Generated/GB_AxB__min_times_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_fp32
 // A'*B function (dot2):     GB_Adot2B__min_times_fp32
 // A'*B function (dot3):     GB_Adot3B__min_times_fp32
-// A*B function (heap):      GB_AheapB__min_times_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij = fminf (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fminf (cij, (aik * bkj))
 // Identity: INFINITY
 // Terminal: if (cij == (-INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fminf (z, (x * y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fminf (z, (x * y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INFINITY
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == (-INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fminf (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fminf (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fminf (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_MIN_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_MIN_TIMES_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_times_fp32
 GrB_Info GB_Adot2B__min_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_fp32
 GrB_Info GB_Adot3B__min_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_times_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_times_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_fp64.c b/Source/Generated/GB_AxB__min_times_fp64.c
index 61786b4260..b6dbfbda87 100644
--- a/Source/Generated/GB_AxB__min_times_fp64.c
+++ b/Source/Generated/GB_AxB__min_times_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_fp64
 // A'*B function (dot2):     GB_Adot2B__min_times_fp64
 // A'*B function (dot3):     GB_Adot3B__min_times_fp64
-// A*B function (heap):      GB_AheapB__min_times_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij = fmin (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  cij = fmin (cij, (aik * bkj))
 // Identity: ((double) INFINITY)
 // Terminal: if (cij == ((double) -INFINITY)) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z = fmin (z, (x * y)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z = fmin (z, (x * y))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     ((double) INFINITY)
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == ((double) -INFINITY)) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = fmin (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    fmin (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = fmin (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_MIN_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_MIN_TIMES_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__min_times_fp64
 GrB_Info GB_Adot2B__min_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_fp64
 GrB_Info GB_Adot3B__min_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__min_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__min_times_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__min_times_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_int16.c b/Source/Generated/GB_AxB__min_times_int16.c
index 3c4b4a936c..b1c7284d57 100644
--- a/Source/Generated/GB_AxB__min_times_int16.c
+++ b/Source/Generated/GB_AxB__min_times_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_int16
 // A'*B function (dot2):     GB_Adot2B__min_times_int16
 // A'*B function (dot3):     GB_Adot3B__min_times_int16
-// A*B function (heap):      GB_AheapB__min_times_int16
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int16_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT16_MAX
 // Terminal: if (cij == INT16_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT16_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_MIN_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_MIN_TIMES_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_int16
 GrB_Info GB_Adot2B__min_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_int16
 GrB_Info GB_Adot3B__min_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_int16
+GrB_Info GB_Asaxpy3B__min_times_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_int32.c b/Source/Generated/GB_AxB__min_times_int32.c
index b03517d273..97fcdcf406 100644
--- a/Source/Generated/GB_AxB__min_times_int32.c
+++ b/Source/Generated/GB_AxB__min_times_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_int32
 // A'*B function (dot2):     GB_Adot2B__min_times_int32
 // A'*B function (dot3):     GB_Adot3B__min_times_int32
-// A*B function (heap):      GB_AheapB__min_times_int32
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int32_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT32_MAX
 // Terminal: if (cij == INT32_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT32_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_MIN_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_MIN_TIMES_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_int32
 GrB_Info GB_Adot2B__min_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_int32
 GrB_Info GB_Adot3B__min_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_int32
+GrB_Info GB_Asaxpy3B__min_times_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_int64.c b/Source/Generated/GB_AxB__min_times_int64.c
index 95c44d859f..058a3a9b2e 100644
--- a/Source/Generated/GB_AxB__min_times_int64.c
+++ b/Source/Generated/GB_AxB__min_times_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_int64
 // A'*B function (dot2):     GB_Adot2B__min_times_int64
 // A'*B function (dot3):     GB_Adot3B__min_times_int64
-// A*B function (heap):      GB_AheapB__min_times_int64
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int64_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT64_MAX
 // Terminal: if (cij == INT64_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT64_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_MIN_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_MIN_TIMES_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_int64
 GrB_Info GB_Adot2B__min_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_int64
 GrB_Info GB_Adot3B__min_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_int64
+GrB_Info GB_Asaxpy3B__min_times_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_int8.c b/Source/Generated/GB_AxB__min_times_int8.c
index 02e43d04cd..c1bfb58bdf 100644
--- a/Source/Generated/GB_AxB__min_times_int8.c
+++ b/Source/Generated/GB_AxB__min_times_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_int8
 // A'*B function (dot2):     GB_Adot2B__min_times_int8
 // A'*B function (dot3):     GB_Adot3B__min_times_int8
-// A*B function (heap):      GB_AheapB__min_times_int8
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  int8_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: INT8_MAX
 // Terminal: if (cij == INT8_MIN) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     INT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == INT8_MIN) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_MIN_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_MIN_TIMES_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_int8
 GrB_Info GB_Adot2B__min_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_int8
 GrB_Info GB_Adot3B__min_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_int8
+GrB_Info GB_Asaxpy3B__min_times_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_uint16.c b/Source/Generated/GB_AxB__min_times_uint16.c
index 4b16159fc2..4292156ed3 100644
--- a/Source/Generated/GB_AxB__min_times_uint16.c
+++ b/Source/Generated/GB_AxB__min_times_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_uint16
 // A'*B function (dot2):     GB_Adot2B__min_times_uint16
 // A'*B function (dot3):     GB_Adot3B__min_times_uint16
-// A*B function (heap):      GB_AheapB__min_times_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint16_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT16_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT16_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_MIN_TIMES_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_uint16
 GrB_Info GB_Adot2B__min_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_uint16
 GrB_Info GB_Adot3B__min_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_uint16
+GrB_Info GB_Asaxpy3B__min_times_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_uint32.c b/Source/Generated/GB_AxB__min_times_uint32.c
index 40c63758bb..e8cf383b93 100644
--- a/Source/Generated/GB_AxB__min_times_uint32.c
+++ b/Source/Generated/GB_AxB__min_times_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_uint32
 // A'*B function (dot2):     GB_Adot2B__min_times_uint32
 // A'*B function (dot3):     GB_Adot3B__min_times_uint32
-// A*B function (heap):      GB_AheapB__min_times_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint32_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT32_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT32_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_MIN_TIMES_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_uint32
 GrB_Info GB_Adot2B__min_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_uint32
 GrB_Info GB_Adot3B__min_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_uint32
+GrB_Info GB_Asaxpy3B__min_times_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_uint64.c b/Source/Generated/GB_AxB__min_times_uint64.c
index 6afda383ea..a2bb550507 100644
--- a/Source/Generated/GB_AxB__min_times_uint64.c
+++ b/Source/Generated/GB_AxB__min_times_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_uint64
 // A'*B function (dot2):     GB_Adot2B__min_times_uint64
 // A'*B function (dot3):     GB_Adot3B__min_times_uint64
-// A*B function (heap):      GB_AheapB__min_times_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint64_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT64_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT64_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_MIN_TIMES_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_uint64
 GrB_Info GB_Adot2B__min_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_uint64
 GrB_Info GB_Adot3B__min_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_uint64
+GrB_Info GB_Asaxpy3B__min_times_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__min_times_uint8.c b/Source/Generated/GB_AxB__min_times_uint8.c
index 61e9719b7b..22dec585fb 100644
--- a/Source/Generated/GB_AxB__min_times_uint8.c
+++ b/Source/Generated/GB_AxB__min_times_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__min_times_uint8
 // A'*B function (dot2):     GB_Adot2B__min_times_uint8
 // A'*B function (dot3):     GB_Adot3B__min_times_uint8
-// A*B function (heap):      GB_AheapB__min_times_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__min_times_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__min_times_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij = GB_IMIN (cij, x_op_y)
+// Add:      cij = GB_IMIN (cij, z)
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 0
 // MultAdd:  uint8_t x_op_y = (aik * bkj) ; cij = GB_IMIN (cij, x_op_y)
 // Identity: UINT8_MAX
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y) ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x * y) ; z = GB_IMIN (z, x_op_y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     UINT8_MAX
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] = GB_IMIN (Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_IMIN (x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    0
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] = GB_IMIN (Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_MIN_TIMES_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__min_times_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__min_times_uint8
 GrB_Info GB_Adot2B__min_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__min_times_uint8
 GrB_Info GB_Adot3B__min_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__min_times_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__min_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__min_times_uint8
+GrB_Info GB_Asaxpy3B__min_times_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_fp32.c b/Source/Generated/GB_AxB__plus_div_fp32.c
index 55dd1d60f9..3e3d368474 100644
--- a/Source/Generated/GB_AxB__plus_div_fp32.c
+++ b/Source/Generated/GB_AxB__plus_div_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_div_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_div_fp32
-// A*B function (heap):      GB_AheapB__plus_div_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik / bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x / y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x / y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_DIV_FP32 || GxB_NO_PLUS_DIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_fp32
 GrB_Info GB_Adot2B__plus_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_fp32
 GrB_Info GB_Adot3B__plus_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_div_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_div_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_fp64.c b/Source/Generated/GB_AxB__plus_div_fp64.c
index e0e7046412..8e4ea2bc1c 100644
--- a/Source/Generated/GB_AxB__plus_div_fp64.c
+++ b/Source/Generated/GB_AxB__plus_div_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_div_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_div_fp64
-// A*B function (heap):      GB_AheapB__plus_div_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik / bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x / y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x / y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_DIV_FP64 || GxB_NO_PLUS_DIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_fp64
 GrB_Info GB_Adot2B__plus_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_fp64
 GrB_Info GB_Adot3B__plus_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_div_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_div_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_int16.c b/Source/Generated/GB_AxB__plus_div_int16.c
index 3fe72b1da5..1660efec8a 100644
--- a/Source/Generated/GB_AxB__plus_div_int16.c
+++ b/Source/Generated/GB_AxB__plus_div_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_int16
 // A'*B function (dot2):     GB_Adot2B__plus_div_int16
 // A'*B function (dot3):     GB_Adot3B__plus_div_int16
-// A*B function (heap):      GB_AheapB__plus_div_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 16)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 16) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_DIV_INT16 || GxB_NO_PLUS_DIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_int16
 GrB_Info GB_Adot2B__plus_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_int16
 GrB_Info GB_Adot3B__plus_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_int16
+GrB_Info GB_Asaxpy3B__plus_div_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_int32.c b/Source/Generated/GB_AxB__plus_div_int32.c
index 13da61aab9..91515a6311 100644
--- a/Source/Generated/GB_AxB__plus_div_int32.c
+++ b/Source/Generated/GB_AxB__plus_div_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_int32
 // A'*B function (dot2):     GB_Adot2B__plus_div_int32
 // A'*B function (dot3):     GB_Adot3B__plus_div_int32
-// A*B function (heap):      GB_AheapB__plus_div_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 32)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 32) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_DIV_INT32 || GxB_NO_PLUS_DIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_int32
 GrB_Info GB_Adot2B__plus_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_int32
 GrB_Info GB_Adot3B__plus_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_int32
+GrB_Info GB_Asaxpy3B__plus_div_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_int64.c b/Source/Generated/GB_AxB__plus_div_int64.c
index 04212ae2ab..6af6979539 100644
--- a/Source/Generated/GB_AxB__plus_div_int64.c
+++ b/Source/Generated/GB_AxB__plus_div_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_int64
 // A'*B function (dot2):     GB_Adot2B__plus_div_int64
 // A'*B function (dot3):     GB_Adot3B__plus_div_int64
-// A*B function (heap):      GB_AheapB__plus_div_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 64)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 64) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_DIV_INT64 || GxB_NO_PLUS_DIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_int64
 GrB_Info GB_Adot2B__plus_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_int64
 GrB_Info GB_Adot3B__plus_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_int64
+GrB_Info GB_Asaxpy3B__plus_div_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_int8.c b/Source/Generated/GB_AxB__plus_div_int8.c
index 8473bb5f5f..47f5b6c362 100644
--- a/Source/Generated/GB_AxB__plus_div_int8.c
+++ b/Source/Generated/GB_AxB__plus_div_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_int8
 // A'*B function (dot2):     GB_Adot2B__plus_div_int8
 // A'*B function (dot3):     GB_Adot3B__plus_div_int8
-// A*B function (heap):      GB_AheapB__plus_div_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 8)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 8) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_DIV_INT8 || GxB_NO_PLUS_DIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_int8
 GrB_Info GB_Adot2B__plus_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_int8
 GrB_Info GB_Adot3B__plus_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_int8
+GrB_Info GB_Asaxpy3B__plus_div_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_uint16.c b/Source/Generated/GB_AxB__plus_div_uint16.c
index d5547c5e3f..26d5372faf 100644
--- a/Source/Generated/GB_AxB__plus_div_uint16.c
+++ b/Source/Generated/GB_AxB__plus_div_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_div_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_div_uint16
-// A*B function (heap):      GB_AheapB__plus_div_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 16)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 16) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_DIV_UINT16 || GxB_NO_PLUS_DIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_uint16
 GrB_Info GB_Adot2B__plus_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_uint16
 GrB_Info GB_Adot3B__plus_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_uint16
+GrB_Info GB_Asaxpy3B__plus_div_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_uint32.c b/Source/Generated/GB_AxB__plus_div_uint32.c
index 2b253425c4..1945d94df7 100644
--- a/Source/Generated/GB_AxB__plus_div_uint32.c
+++ b/Source/Generated/GB_AxB__plus_div_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_div_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_div_uint32
-// A*B function (heap):      GB_AheapB__plus_div_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 32)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 32) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_DIV_UINT32 || GxB_NO_PLUS_DIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_uint32
 GrB_Info GB_Adot2B__plus_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_uint32
 GrB_Info GB_Adot3B__plus_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_uint32
+GrB_Info GB_Asaxpy3B__plus_div_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_uint64.c b/Source/Generated/GB_AxB__plus_div_uint64.c
index c63da79b4e..5a33802824 100644
--- a/Source/Generated/GB_AxB__plus_div_uint64.c
+++ b/Source/Generated/GB_AxB__plus_div_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_div_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_div_uint64
-// A*B function (heap):      GB_AheapB__plus_div_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 64)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 64) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_DIV_UINT64 || GxB_NO_PLUS_DIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_uint64
 GrB_Info GB_Adot2B__plus_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_uint64
 GrB_Info GB_Adot3B__plus_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_uint64
+GrB_Info GB_Asaxpy3B__plus_div_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_div_uint8.c b/Source/Generated/GB_AxB__plus_div_uint8.c
index 954ed0ee78..18983f823e 100644
--- a/Source/Generated/GB_AxB__plus_div_uint8.c
+++ b/Source/Generated/GB_AxB__plus_div_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_div_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_div_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_div_uint8
-// A*B function (heap):      GB_AheapB__plus_div_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_div_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_div_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 8)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 8) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_DIV_UINT8 || GxB_NO_PLUS_DIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_div_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_div_uint8
 GrB_Info GB_Adot2B__plus_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_div_uint8
 GrB_Info GB_Adot3B__plus_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_div_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_div_uint8
+GrB_Info GB_Asaxpy3B__plus_div_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_fp32.c b/Source/Generated/GB_AxB__plus_first_fp32.c
index b17649405c..cb8320bda8 100644
--- a/Source/Generated/GB_AxB__plus_first_fp32.c
+++ b/Source/Generated/GB_AxB__plus_first_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_first_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_first_fp32
-// A*B function (heap):      GB_AheapB__plus_first_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_FIRST_FP32 || GxB_NO_PLUS_FIRST_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_fp32
 GrB_Info GB_Adot2B__plus_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_fp32
 GrB_Info GB_Adot3B__plus_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_fp64.c b/Source/Generated/GB_AxB__plus_first_fp64.c
index 13df790e0a..47e46c2a39 100644
--- a/Source/Generated/GB_AxB__plus_first_fp64.c
+++ b/Source/Generated/GB_AxB__plus_first_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_first_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_first_fp64
-// A*B function (heap):      GB_AheapB__plus_first_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_FIRST_FP64 || GxB_NO_PLUS_FIRST_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_fp64
 GrB_Info GB_Adot2B__plus_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_fp64
 GrB_Info GB_Adot3B__plus_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_int16.c b/Source/Generated/GB_AxB__plus_first_int16.c
index 8031a192b1..f3f4b306d2 100644
--- a/Source/Generated/GB_AxB__plus_first_int16.c
+++ b/Source/Generated/GB_AxB__plus_first_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_int16
 // A'*B function (dot2):     GB_Adot2B__plus_first_int16
 // A'*B function (dot3):     GB_Adot3B__plus_first_int16
-// A*B function (heap):      GB_AheapB__plus_first_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_FIRST_INT16 || GxB_NO_PLUS_FIRST_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_int16
 GrB_Info GB_Adot2B__plus_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_int16
 GrB_Info GB_Adot3B__plus_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_int32.c b/Source/Generated/GB_AxB__plus_first_int32.c
index 2762cdc42b..e27243ec3c 100644
--- a/Source/Generated/GB_AxB__plus_first_int32.c
+++ b/Source/Generated/GB_AxB__plus_first_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_int32
 // A'*B function (dot2):     GB_Adot2B__plus_first_int32
 // A'*B function (dot3):     GB_Adot3B__plus_first_int32
-// A*B function (heap):      GB_AheapB__plus_first_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_FIRST_INT32 || GxB_NO_PLUS_FIRST_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_int32
 GrB_Info GB_Adot2B__plus_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_int32
 GrB_Info GB_Adot3B__plus_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_int64.c b/Source/Generated/GB_AxB__plus_first_int64.c
index 389d28a9b8..53f29d3cc2 100644
--- a/Source/Generated/GB_AxB__plus_first_int64.c
+++ b/Source/Generated/GB_AxB__plus_first_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_int64
 // A'*B function (dot2):     GB_Adot2B__plus_first_int64
 // A'*B function (dot3):     GB_Adot3B__plus_first_int64
-// A*B function (heap):      GB_AheapB__plus_first_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_FIRST_INT64 || GxB_NO_PLUS_FIRST_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_int64
 GrB_Info GB_Adot2B__plus_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_int64
 GrB_Info GB_Adot3B__plus_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_first_int64
+GrB_Info GB_Asaxpy3B__plus_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_int8.c b/Source/Generated/GB_AxB__plus_first_int8.c
index 9828d70c63..51ab3f5cc5 100644
--- a/Source/Generated/GB_AxB__plus_first_int8.c
+++ b/Source/Generated/GB_AxB__plus_first_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_int8
 // A'*B function (dot2):     GB_Adot2B__plus_first_int8
 // A'*B function (dot3):     GB_Adot3B__plus_first_int8
-// A*B function (heap):      GB_AheapB__plus_first_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_FIRST_INT8 || GxB_NO_PLUS_FIRST_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_int8
 GrB_Info GB_Adot2B__plus_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_int8
 GrB_Info GB_Adot3B__plus_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_uint16.c b/Source/Generated/GB_AxB__plus_first_uint16.c
index 84559cc62f..b96c64a1dd 100644
--- a/Source/Generated/GB_AxB__plus_first_uint16.c
+++ b/Source/Generated/GB_AxB__plus_first_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_first_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_first_uint16
-// A*B function (heap):      GB_AheapB__plus_first_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_FIRST_UINT16 || GxB_NO_PLUS_FIRST_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_uint16
 GrB_Info GB_Adot2B__plus_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_uint16
 GrB_Info GB_Adot3B__plus_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_uint32.c b/Source/Generated/GB_AxB__plus_first_uint32.c
index ba131bfd6a..13d8cbe941 100644
--- a/Source/Generated/GB_AxB__plus_first_uint32.c
+++ b/Source/Generated/GB_AxB__plus_first_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_first_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_first_uint32
-// A*B function (heap):      GB_AheapB__plus_first_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_FIRST_UINT32 || GxB_NO_PLUS_FIRST_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_uint32
 GrB_Info GB_Adot2B__plus_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_uint32
 GrB_Info GB_Adot3B__plus_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_uint64.c b/Source/Generated/GB_AxB__plus_first_uint64.c
index 3c536afbba..54c265025c 100644
--- a/Source/Generated/GB_AxB__plus_first_uint64.c
+++ b/Source/Generated/GB_AxB__plus_first_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_first_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_first_uint64
-// A*B function (heap):      GB_AheapB__plus_first_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_FIRST_UINT64 || GxB_NO_PLUS_FIRST_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_uint64
 GrB_Info GB_Adot2B__plus_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_uint64
 GrB_Info GB_Adot3B__plus_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_first_uint64
+GrB_Info GB_Asaxpy3B__plus_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_first_uint8.c b/Source/Generated/GB_AxB__plus_first_uint8.c
index 817c01dbf9..6422ba27f6 100644
--- a/Source/Generated/GB_AxB__plus_first_uint8.c
+++ b/Source/Generated/GB_AxB__plus_first_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_first_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_first_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_first_uint8
-// A*B function (heap):      GB_AheapB__plus_first_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_first_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_first_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += aik
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_FIRST_UINT8 || GxB_NO_PLUS_FIRST_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_first_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_first_uint8
 GrB_Info GB_Adot2B__plus_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_first_uint8
 GrB_Info GB_Adot3B__plus_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_first_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_first_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_fp32.c b/Source/Generated/GB_AxB__plus_iseq_fp32.c
index ce0509aca4..30613f0fac 100644
--- a/Source/Generated/GB_AxB__plus_iseq_fp32.c
+++ b/Source/Generated/GB_AxB__plus_iseq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_fp32
-// A*B function (heap):      GB_AheapB__plus_iseq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISEQ_FP32 || GxB_NO_PLUS_ISEQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_fp32
 GrB_Info GB_Adot2B__plus_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_fp32
 GrB_Info GB_Adot3B__plus_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_fp64.c b/Source/Generated/GB_AxB__plus_iseq_fp64.c
index 1af53e59c8..1063a0d97e 100644
--- a/Source/Generated/GB_AxB__plus_iseq_fp64.c
+++ b/Source/Generated/GB_AxB__plus_iseq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_fp64
-// A*B function (heap):      GB_AheapB__plus_iseq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISEQ_FP64 || GxB_NO_PLUS_ISEQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_fp64
 GrB_Info GB_Adot2B__plus_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_fp64
 GrB_Info GB_Adot3B__plus_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_int16.c b/Source/Generated/GB_AxB__plus_iseq_int16.c
index a95409a179..80bc986410 100644
--- a/Source/Generated/GB_AxB__plus_iseq_int16.c
+++ b/Source/Generated/GB_AxB__plus_iseq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_int16
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_int16
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_int16
-// A*B function (heap):      GB_AheapB__plus_iseq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISEQ_INT16 || GxB_NO_PLUS_ISEQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_int16
 GrB_Info GB_Adot2B__plus_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_int16
 GrB_Info GB_Adot3B__plus_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_int32.c b/Source/Generated/GB_AxB__plus_iseq_int32.c
index cc2c56b8c0..6debed15b9 100644
--- a/Source/Generated/GB_AxB__plus_iseq_int32.c
+++ b/Source/Generated/GB_AxB__plus_iseq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_int32
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_int32
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_int32
-// A*B function (heap):      GB_AheapB__plus_iseq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISEQ_INT32 || GxB_NO_PLUS_ISEQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_int32
 GrB_Info GB_Adot2B__plus_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_int32
 GrB_Info GB_Adot3B__plus_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_int64.c b/Source/Generated/GB_AxB__plus_iseq_int64.c
index fca38f05b1..5f8c3f32b0 100644
--- a/Source/Generated/GB_AxB__plus_iseq_int64.c
+++ b/Source/Generated/GB_AxB__plus_iseq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_int64
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_int64
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_int64
-// A*B function (heap):      GB_AheapB__plus_iseq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISEQ_INT64 || GxB_NO_PLUS_ISEQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_int64
 GrB_Info GB_Adot2B__plus_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_int64
 GrB_Info GB_Adot3B__plus_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_iseq_int64
+GrB_Info GB_Asaxpy3B__plus_iseq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_int8.c b/Source/Generated/GB_AxB__plus_iseq_int8.c
index 30463d573f..a7843f2dbf 100644
--- a/Source/Generated/GB_AxB__plus_iseq_int8.c
+++ b/Source/Generated/GB_AxB__plus_iseq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_int8
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_int8
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_int8
-// A*B function (heap):      GB_AheapB__plus_iseq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISEQ_INT8 || GxB_NO_PLUS_ISEQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_int8
 GrB_Info GB_Adot2B__plus_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_int8
 GrB_Info GB_Adot3B__plus_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_uint16.c b/Source/Generated/GB_AxB__plus_iseq_uint16.c
index f69d499437..202e7b56d2 100644
--- a/Source/Generated/GB_AxB__plus_iseq_uint16.c
+++ b/Source/Generated/GB_AxB__plus_iseq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_uint16
-// A*B function (heap):      GB_AheapB__plus_iseq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISEQ_UINT16 || GxB_NO_PLUS_ISEQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_uint16
 GrB_Info GB_Adot2B__plus_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_uint16
 GrB_Info GB_Adot3B__plus_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_uint32.c b/Source/Generated/GB_AxB__plus_iseq_uint32.c
index a311ee80b3..2ddca83cda 100644
--- a/Source/Generated/GB_AxB__plus_iseq_uint32.c
+++ b/Source/Generated/GB_AxB__plus_iseq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_uint32
-// A*B function (heap):      GB_AheapB__plus_iseq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISEQ_UINT32 || GxB_NO_PLUS_ISEQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_uint32
 GrB_Info GB_Adot2B__plus_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_uint32
 GrB_Info GB_Adot3B__plus_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_uint64.c b/Source/Generated/GB_AxB__plus_iseq_uint64.c
index de40cdf3fb..4155c123f7 100644
--- a/Source/Generated/GB_AxB__plus_iseq_uint64.c
+++ b/Source/Generated/GB_AxB__plus_iseq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_uint64
-// A*B function (heap):      GB_AheapB__plus_iseq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISEQ_UINT64 || GxB_NO_PLUS_ISEQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_uint64
 GrB_Info GB_Adot2B__plus_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_uint64
 GrB_Info GB_Adot3B__plus_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_iseq_uint64
+GrB_Info GB_Asaxpy3B__plus_iseq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_iseq_uint8.c b/Source/Generated/GB_AxB__plus_iseq_uint8.c
index b2513c65a6..bc68504858 100644
--- a/Source/Generated/GB_AxB__plus_iseq_uint8.c
+++ b/Source/Generated/GB_AxB__plus_iseq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_iseq_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_iseq_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_iseq_uint8
-// A*B function (heap):      GB_AheapB__plus_iseq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_iseq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_iseq_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik == bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISEQ_UINT8 || GxB_NO_PLUS_ISEQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_iseq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_iseq_uint8
 GrB_Info GB_Adot2B__plus_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_iseq_uint8
 GrB_Info GB_Adot3B__plus_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_iseq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_iseq_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_iseq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_fp32.c b/Source/Generated/GB_AxB__plus_isge_fp32.c
index f7d15e89ed..312d9ff3e4 100644
--- a/Source/Generated/GB_AxB__plus_isge_fp32.c
+++ b/Source/Generated/GB_AxB__plus_isge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_isge_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_isge_fp32
-// A*B function (heap):      GB_AheapB__plus_isge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISGE_FP32 || GxB_NO_PLUS_ISGE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_fp32
 GrB_Info GB_Adot2B__plus_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_fp32
 GrB_Info GB_Adot3B__plus_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_fp64.c b/Source/Generated/GB_AxB__plus_isge_fp64.c
index 047ca8a57e..149973fd19 100644
--- a/Source/Generated/GB_AxB__plus_isge_fp64.c
+++ b/Source/Generated/GB_AxB__plus_isge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_isge_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_isge_fp64
-// A*B function (heap):      GB_AheapB__plus_isge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISGE_FP64 || GxB_NO_PLUS_ISGE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_fp64
 GrB_Info GB_Adot2B__plus_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_fp64
 GrB_Info GB_Adot3B__plus_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_int16.c b/Source/Generated/GB_AxB__plus_isge_int16.c
index cefcbdecdf..1715797b32 100644
--- a/Source/Generated/GB_AxB__plus_isge_int16.c
+++ b/Source/Generated/GB_AxB__plus_isge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_int16
 // A'*B function (dot2):     GB_Adot2B__plus_isge_int16
 // A'*B function (dot3):     GB_Adot3B__plus_isge_int16
-// A*B function (heap):      GB_AheapB__plus_isge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISGE_INT16 || GxB_NO_PLUS_ISGE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_int16
 GrB_Info GB_Adot2B__plus_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_int16
 GrB_Info GB_Adot3B__plus_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_int32.c b/Source/Generated/GB_AxB__plus_isge_int32.c
index 6c10c540f2..0cc40e38f4 100644
--- a/Source/Generated/GB_AxB__plus_isge_int32.c
+++ b/Source/Generated/GB_AxB__plus_isge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_int32
 // A'*B function (dot2):     GB_Adot2B__plus_isge_int32
 // A'*B function (dot3):     GB_Adot3B__plus_isge_int32
-// A*B function (heap):      GB_AheapB__plus_isge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISGE_INT32 || GxB_NO_PLUS_ISGE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_int32
 GrB_Info GB_Adot2B__plus_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_int32
 GrB_Info GB_Adot3B__plus_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_int64.c b/Source/Generated/GB_AxB__plus_isge_int64.c
index 297268f01c..23b4db666e 100644
--- a/Source/Generated/GB_AxB__plus_isge_int64.c
+++ b/Source/Generated/GB_AxB__plus_isge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_int64
 // A'*B function (dot2):     GB_Adot2B__plus_isge_int64
 // A'*B function (dot3):     GB_Adot3B__plus_isge_int64
-// A*B function (heap):      GB_AheapB__plus_isge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISGE_INT64 || GxB_NO_PLUS_ISGE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_int64
 GrB_Info GB_Adot2B__plus_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_int64
 GrB_Info GB_Adot3B__plus_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isge_int64
+GrB_Info GB_Asaxpy3B__plus_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_int8.c b/Source/Generated/GB_AxB__plus_isge_int8.c
index b9bb28c3cc..d9b1fdcd78 100644
--- a/Source/Generated/GB_AxB__plus_isge_int8.c
+++ b/Source/Generated/GB_AxB__plus_isge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_int8
 // A'*B function (dot2):     GB_Adot2B__plus_isge_int8
 // A'*B function (dot3):     GB_Adot3B__plus_isge_int8
-// A*B function (heap):      GB_AheapB__plus_isge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISGE_INT8 || GxB_NO_PLUS_ISGE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_int8
 GrB_Info GB_Adot2B__plus_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_int8
 GrB_Info GB_Adot3B__plus_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_uint16.c b/Source/Generated/GB_AxB__plus_isge_uint16.c
index fd4248300d..0d5c2073ee 100644
--- a/Source/Generated/GB_AxB__plus_isge_uint16.c
+++ b/Source/Generated/GB_AxB__plus_isge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_isge_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_isge_uint16
-// A*B function (heap):      GB_AheapB__plus_isge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISGE_UINT16 || GxB_NO_PLUS_ISGE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_uint16
 GrB_Info GB_Adot2B__plus_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_uint16
 GrB_Info GB_Adot3B__plus_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_uint32.c b/Source/Generated/GB_AxB__plus_isge_uint32.c
index 023f24e19b..9558ac595f 100644
--- a/Source/Generated/GB_AxB__plus_isge_uint32.c
+++ b/Source/Generated/GB_AxB__plus_isge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_isge_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_isge_uint32
-// A*B function (heap):      GB_AheapB__plus_isge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISGE_UINT32 || GxB_NO_PLUS_ISGE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_uint32
 GrB_Info GB_Adot2B__plus_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_uint32
 GrB_Info GB_Adot3B__plus_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_uint64.c b/Source/Generated/GB_AxB__plus_isge_uint64.c
index 13d417367b..460eb23cca 100644
--- a/Source/Generated/GB_AxB__plus_isge_uint64.c
+++ b/Source/Generated/GB_AxB__plus_isge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_isge_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_isge_uint64
-// A*B function (heap):      GB_AheapB__plus_isge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISGE_UINT64 || GxB_NO_PLUS_ISGE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_uint64
 GrB_Info GB_Adot2B__plus_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_uint64
 GrB_Info GB_Adot3B__plus_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isge_uint64
+GrB_Info GB_Asaxpy3B__plus_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isge_uint8.c b/Source/Generated/GB_AxB__plus_isge_uint8.c
index 9121d4164f..fae2a37eb7 100644
--- a/Source/Generated/GB_AxB__plus_isge_uint8.c
+++ b/Source/Generated/GB_AxB__plus_isge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isge_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_isge_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_isge_uint8
-// A*B function (heap):      GB_AheapB__plus_isge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isge_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik >= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISGE_UINT8 || GxB_NO_PLUS_ISGE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isge_uint8
 GrB_Info GB_Adot2B__plus_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isge_uint8
 GrB_Info GB_Adot3B__plus_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isge_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_fp32.c b/Source/Generated/GB_AxB__plus_isgt_fp32.c
index dbd2e19780..6d3ade0ecc 100644
--- a/Source/Generated/GB_AxB__plus_isgt_fp32.c
+++ b/Source/Generated/GB_AxB__plus_isgt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_fp32
-// A*B function (heap):      GB_AheapB__plus_isgt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISGT_FP32 || GxB_NO_PLUS_ISGT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_fp32
 GrB_Info GB_Adot2B__plus_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_fp32
 GrB_Info GB_Adot3B__plus_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_fp64.c b/Source/Generated/GB_AxB__plus_isgt_fp64.c
index 9a87d92243..adf8ccfc09 100644
--- a/Source/Generated/GB_AxB__plus_isgt_fp64.c
+++ b/Source/Generated/GB_AxB__plus_isgt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_fp64
-// A*B function (heap):      GB_AheapB__plus_isgt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISGT_FP64 || GxB_NO_PLUS_ISGT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_fp64
 GrB_Info GB_Adot2B__plus_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_fp64
 GrB_Info GB_Adot3B__plus_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_int16.c b/Source/Generated/GB_AxB__plus_isgt_int16.c
index 4120dd52fe..1927eb6a42 100644
--- a/Source/Generated/GB_AxB__plus_isgt_int16.c
+++ b/Source/Generated/GB_AxB__plus_isgt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_int16
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_int16
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_int16
-// A*B function (heap):      GB_AheapB__plus_isgt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISGT_INT16 || GxB_NO_PLUS_ISGT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_int16
 GrB_Info GB_Adot2B__plus_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_int16
 GrB_Info GB_Adot3B__plus_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_int32.c b/Source/Generated/GB_AxB__plus_isgt_int32.c
index c95c04a6f1..86dc599b17 100644
--- a/Source/Generated/GB_AxB__plus_isgt_int32.c
+++ b/Source/Generated/GB_AxB__plus_isgt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_int32
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_int32
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_int32
-// A*B function (heap):      GB_AheapB__plus_isgt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISGT_INT32 || GxB_NO_PLUS_ISGT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_int32
 GrB_Info GB_Adot2B__plus_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_int32
 GrB_Info GB_Adot3B__plus_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_int64.c b/Source/Generated/GB_AxB__plus_isgt_int64.c
index 5fc1d97073..ab693b8f90 100644
--- a/Source/Generated/GB_AxB__plus_isgt_int64.c
+++ b/Source/Generated/GB_AxB__plus_isgt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_int64
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_int64
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_int64
-// A*B function (heap):      GB_AheapB__plus_isgt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISGT_INT64 || GxB_NO_PLUS_ISGT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_int64
 GrB_Info GB_Adot2B__plus_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_int64
 GrB_Info GB_Adot3B__plus_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isgt_int64
+GrB_Info GB_Asaxpy3B__plus_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_int8.c b/Source/Generated/GB_AxB__plus_isgt_int8.c
index 0de528a85a..5c666a3c66 100644
--- a/Source/Generated/GB_AxB__plus_isgt_int8.c
+++ b/Source/Generated/GB_AxB__plus_isgt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_int8
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_int8
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_int8
-// A*B function (heap):      GB_AheapB__plus_isgt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISGT_INT8 || GxB_NO_PLUS_ISGT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_int8
 GrB_Info GB_Adot2B__plus_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_int8
 GrB_Info GB_Adot3B__plus_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_uint16.c b/Source/Generated/GB_AxB__plus_isgt_uint16.c
index 433bff6d7f..f1cf5d807c 100644
--- a/Source/Generated/GB_AxB__plus_isgt_uint16.c
+++ b/Source/Generated/GB_AxB__plus_isgt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_uint16
-// A*B function (heap):      GB_AheapB__plus_isgt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISGT_UINT16 || GxB_NO_PLUS_ISGT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_uint16
 GrB_Info GB_Adot2B__plus_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_uint16
 GrB_Info GB_Adot3B__plus_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_uint32.c b/Source/Generated/GB_AxB__plus_isgt_uint32.c
index a9df282a06..d73dcf89a9 100644
--- a/Source/Generated/GB_AxB__plus_isgt_uint32.c
+++ b/Source/Generated/GB_AxB__plus_isgt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_uint32
-// A*B function (heap):      GB_AheapB__plus_isgt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISGT_UINT32 || GxB_NO_PLUS_ISGT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_uint32
 GrB_Info GB_Adot2B__plus_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_uint32
 GrB_Info GB_Adot3B__plus_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_uint64.c b/Source/Generated/GB_AxB__plus_isgt_uint64.c
index adf777effa..7203804989 100644
--- a/Source/Generated/GB_AxB__plus_isgt_uint64.c
+++ b/Source/Generated/GB_AxB__plus_isgt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_uint64
-// A*B function (heap):      GB_AheapB__plus_isgt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISGT_UINT64 || GxB_NO_PLUS_ISGT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_uint64
 GrB_Info GB_Adot2B__plus_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_uint64
 GrB_Info GB_Adot3B__plus_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isgt_uint64
+GrB_Info GB_Asaxpy3B__plus_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isgt_uint8.c b/Source/Generated/GB_AxB__plus_isgt_uint8.c
index b34cbdcccd..84fb32dc8c 100644
--- a/Source/Generated/GB_AxB__plus_isgt_uint8.c
+++ b/Source/Generated/GB_AxB__plus_isgt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isgt_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_isgt_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_isgt_uint8
-// A*B function (heap):      GB_AheapB__plus_isgt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isgt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isgt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik > bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISGT_UINT8 || GxB_NO_PLUS_ISGT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isgt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isgt_uint8
 GrB_Info GB_Adot2B__plus_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isgt_uint8
 GrB_Info GB_Adot3B__plus_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isgt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isgt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_fp32.c b/Source/Generated/GB_AxB__plus_isle_fp32.c
index 595b1bc741..42b4923d02 100644
--- a/Source/Generated/GB_AxB__plus_isle_fp32.c
+++ b/Source/Generated/GB_AxB__plus_isle_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_isle_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_isle_fp32
-// A*B function (heap):      GB_AheapB__plus_isle_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISLE_FP32 || GxB_NO_PLUS_ISLE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_fp32
 GrB_Info GB_Adot2B__plus_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_fp32
 GrB_Info GB_Adot3B__plus_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_fp64.c b/Source/Generated/GB_AxB__plus_isle_fp64.c
index 5333b01148..a372c280d2 100644
--- a/Source/Generated/GB_AxB__plus_isle_fp64.c
+++ b/Source/Generated/GB_AxB__plus_isle_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_isle_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_isle_fp64
-// A*B function (heap):      GB_AheapB__plus_isle_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISLE_FP64 || GxB_NO_PLUS_ISLE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_fp64
 GrB_Info GB_Adot2B__plus_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_fp64
 GrB_Info GB_Adot3B__plus_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_int16.c b/Source/Generated/GB_AxB__plus_isle_int16.c
index 7f867f244f..a69a48c7cf 100644
--- a/Source/Generated/GB_AxB__plus_isle_int16.c
+++ b/Source/Generated/GB_AxB__plus_isle_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_int16
 // A'*B function (dot2):     GB_Adot2B__plus_isle_int16
 // A'*B function (dot3):     GB_Adot3B__plus_isle_int16
-// A*B function (heap):      GB_AheapB__plus_isle_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISLE_INT16 || GxB_NO_PLUS_ISLE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_int16
 GrB_Info GB_Adot2B__plus_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_int16
 GrB_Info GB_Adot3B__plus_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_int32.c b/Source/Generated/GB_AxB__plus_isle_int32.c
index e397c2b224..03b1d9df2a 100644
--- a/Source/Generated/GB_AxB__plus_isle_int32.c
+++ b/Source/Generated/GB_AxB__plus_isle_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_int32
 // A'*B function (dot2):     GB_Adot2B__plus_isle_int32
 // A'*B function (dot3):     GB_Adot3B__plus_isle_int32
-// A*B function (heap):      GB_AheapB__plus_isle_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISLE_INT32 || GxB_NO_PLUS_ISLE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_int32
 GrB_Info GB_Adot2B__plus_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_int32
 GrB_Info GB_Adot3B__plus_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_int64.c b/Source/Generated/GB_AxB__plus_isle_int64.c
index 55d64b6dda..514fd9c4af 100644
--- a/Source/Generated/GB_AxB__plus_isle_int64.c
+++ b/Source/Generated/GB_AxB__plus_isle_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_int64
 // A'*B function (dot2):     GB_Adot2B__plus_isle_int64
 // A'*B function (dot3):     GB_Adot3B__plus_isle_int64
-// A*B function (heap):      GB_AheapB__plus_isle_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISLE_INT64 || GxB_NO_PLUS_ISLE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_int64
 GrB_Info GB_Adot2B__plus_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_int64
 GrB_Info GB_Adot3B__plus_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isle_int64
+GrB_Info GB_Asaxpy3B__plus_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_int8.c b/Source/Generated/GB_AxB__plus_isle_int8.c
index 38ff9be98f..0f4446bf8c 100644
--- a/Source/Generated/GB_AxB__plus_isle_int8.c
+++ b/Source/Generated/GB_AxB__plus_isle_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_int8
 // A'*B function (dot2):     GB_Adot2B__plus_isle_int8
 // A'*B function (dot3):     GB_Adot3B__plus_isle_int8
-// A*B function (heap):      GB_AheapB__plus_isle_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISLE_INT8 || GxB_NO_PLUS_ISLE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_int8
 GrB_Info GB_Adot2B__plus_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_int8
 GrB_Info GB_Adot3B__plus_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_uint16.c b/Source/Generated/GB_AxB__plus_isle_uint16.c
index cef095698b..d0aedb2156 100644
--- a/Source/Generated/GB_AxB__plus_isle_uint16.c
+++ b/Source/Generated/GB_AxB__plus_isle_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_isle_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_isle_uint16
-// A*B function (heap):      GB_AheapB__plus_isle_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISLE_UINT16 || GxB_NO_PLUS_ISLE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_uint16
 GrB_Info GB_Adot2B__plus_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_uint16
 GrB_Info GB_Adot3B__plus_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_uint32.c b/Source/Generated/GB_AxB__plus_isle_uint32.c
index 43bb323353..61189d8392 100644
--- a/Source/Generated/GB_AxB__plus_isle_uint32.c
+++ b/Source/Generated/GB_AxB__plus_isle_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_isle_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_isle_uint32
-// A*B function (heap):      GB_AheapB__plus_isle_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISLE_UINT32 || GxB_NO_PLUS_ISLE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_uint32
 GrB_Info GB_Adot2B__plus_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_uint32
 GrB_Info GB_Adot3B__plus_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_uint64.c b/Source/Generated/GB_AxB__plus_isle_uint64.c
index a997c0813f..96dd0864b8 100644
--- a/Source/Generated/GB_AxB__plus_isle_uint64.c
+++ b/Source/Generated/GB_AxB__plus_isle_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_isle_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_isle_uint64
-// A*B function (heap):      GB_AheapB__plus_isle_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISLE_UINT64 || GxB_NO_PLUS_ISLE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_uint64
 GrB_Info GB_Adot2B__plus_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_uint64
 GrB_Info GB_Adot3B__plus_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isle_uint64
+GrB_Info GB_Asaxpy3B__plus_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isle_uint8.c b/Source/Generated/GB_AxB__plus_isle_uint8.c
index b29c42416e..6f89f2a419 100644
--- a/Source/Generated/GB_AxB__plus_isle_uint8.c
+++ b/Source/Generated/GB_AxB__plus_isle_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isle_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_isle_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_isle_uint8
-// A*B function (heap):      GB_AheapB__plus_isle_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isle_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isle_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik <= bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISLE_UINT8 || GxB_NO_PLUS_ISLE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isle_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isle_uint8
 GrB_Info GB_Adot2B__plus_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isle_uint8
 GrB_Info GB_Adot3B__plus_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isle_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isle_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_fp32.c b/Source/Generated/GB_AxB__plus_islt_fp32.c
index 7b59d80ade..6bcb33456f 100644
--- a/Source/Generated/GB_AxB__plus_islt_fp32.c
+++ b/Source/Generated/GB_AxB__plus_islt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_islt_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_islt_fp32
-// A*B function (heap):      GB_AheapB__plus_islt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISLT_FP32 || GxB_NO_PLUS_ISLT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_fp32
 GrB_Info GB_Adot2B__plus_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_fp32
 GrB_Info GB_Adot3B__plus_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_fp64.c b/Source/Generated/GB_AxB__plus_islt_fp64.c
index 952143e6a0..63c9fc0dda 100644
--- a/Source/Generated/GB_AxB__plus_islt_fp64.c
+++ b/Source/Generated/GB_AxB__plus_islt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_islt_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_islt_fp64
-// A*B function (heap):      GB_AheapB__plus_islt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISLT_FP64 || GxB_NO_PLUS_ISLT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_fp64
 GrB_Info GB_Adot2B__plus_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_fp64
 GrB_Info GB_Adot3B__plus_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_int16.c b/Source/Generated/GB_AxB__plus_islt_int16.c
index e72d825f38..cec789efe5 100644
--- a/Source/Generated/GB_AxB__plus_islt_int16.c
+++ b/Source/Generated/GB_AxB__plus_islt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_int16
 // A'*B function (dot2):     GB_Adot2B__plus_islt_int16
 // A'*B function (dot3):     GB_Adot3B__plus_islt_int16
-// A*B function (heap):      GB_AheapB__plus_islt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISLT_INT16 || GxB_NO_PLUS_ISLT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_int16
 GrB_Info GB_Adot2B__plus_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_int16
 GrB_Info GB_Adot3B__plus_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_int32.c b/Source/Generated/GB_AxB__plus_islt_int32.c
index 9e1dd3d3af..c88e00b23e 100644
--- a/Source/Generated/GB_AxB__plus_islt_int32.c
+++ b/Source/Generated/GB_AxB__plus_islt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_int32
 // A'*B function (dot2):     GB_Adot2B__plus_islt_int32
 // A'*B function (dot3):     GB_Adot3B__plus_islt_int32
-// A*B function (heap):      GB_AheapB__plus_islt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISLT_INT32 || GxB_NO_PLUS_ISLT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_int32
 GrB_Info GB_Adot2B__plus_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_int32
 GrB_Info GB_Adot3B__plus_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_int64.c b/Source/Generated/GB_AxB__plus_islt_int64.c
index 6c0def8863..ca0b1b3fff 100644
--- a/Source/Generated/GB_AxB__plus_islt_int64.c
+++ b/Source/Generated/GB_AxB__plus_islt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_int64
 // A'*B function (dot2):     GB_Adot2B__plus_islt_int64
 // A'*B function (dot3):     GB_Adot3B__plus_islt_int64
-// A*B function (heap):      GB_AheapB__plus_islt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISLT_INT64 || GxB_NO_PLUS_ISLT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_int64
 GrB_Info GB_Adot2B__plus_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_int64
 GrB_Info GB_Adot3B__plus_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_islt_int64
+GrB_Info GB_Asaxpy3B__plus_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_int8.c b/Source/Generated/GB_AxB__plus_islt_int8.c
index dab0f05a0b..9f7b50e6a2 100644
--- a/Source/Generated/GB_AxB__plus_islt_int8.c
+++ b/Source/Generated/GB_AxB__plus_islt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_int8
 // A'*B function (dot2):     GB_Adot2B__plus_islt_int8
 // A'*B function (dot3):     GB_Adot3B__plus_islt_int8
-// A*B function (heap):      GB_AheapB__plus_islt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISLT_INT8 || GxB_NO_PLUS_ISLT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_int8
 GrB_Info GB_Adot2B__plus_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_int8
 GrB_Info GB_Adot3B__plus_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_uint16.c b/Source/Generated/GB_AxB__plus_islt_uint16.c
index 10929d60ab..5e921c60c9 100644
--- a/Source/Generated/GB_AxB__plus_islt_uint16.c
+++ b/Source/Generated/GB_AxB__plus_islt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_islt_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_islt_uint16
-// A*B function (heap):      GB_AheapB__plus_islt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISLT_UINT16 || GxB_NO_PLUS_ISLT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_uint16
 GrB_Info GB_Adot2B__plus_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_uint16
 GrB_Info GB_Adot3B__plus_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_uint32.c b/Source/Generated/GB_AxB__plus_islt_uint32.c
index 9186861134..acddd06e78 100644
--- a/Source/Generated/GB_AxB__plus_islt_uint32.c
+++ b/Source/Generated/GB_AxB__plus_islt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_islt_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_islt_uint32
-// A*B function (heap):      GB_AheapB__plus_islt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISLT_UINT32 || GxB_NO_PLUS_ISLT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_uint32
 GrB_Info GB_Adot2B__plus_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_uint32
 GrB_Info GB_Adot3B__plus_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_uint64.c b/Source/Generated/GB_AxB__plus_islt_uint64.c
index 30ea37d47a..2b04d1b65d 100644
--- a/Source/Generated/GB_AxB__plus_islt_uint64.c
+++ b/Source/Generated/GB_AxB__plus_islt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_islt_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_islt_uint64
-// A*B function (heap):      GB_AheapB__plus_islt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISLT_UINT64 || GxB_NO_PLUS_ISLT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_uint64
 GrB_Info GB_Adot2B__plus_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_uint64
 GrB_Info GB_Adot3B__plus_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_islt_uint64
+GrB_Info GB_Asaxpy3B__plus_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_islt_uint8.c b/Source/Generated/GB_AxB__plus_islt_uint8.c
index ed26bc42cf..ffcb35b949 100644
--- a/Source/Generated/GB_AxB__plus_islt_uint8.c
+++ b/Source/Generated/GB_AxB__plus_islt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_islt_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_islt_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_islt_uint8
-// A*B function (heap):      GB_AheapB__plus_islt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_islt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_islt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik < bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISLT_UINT8 || GxB_NO_PLUS_ISLT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_islt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_islt_uint8
 GrB_Info GB_Adot2B__plus_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_islt_uint8
 GrB_Info GB_Adot3B__plus_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_islt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_islt_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_fp32.c b/Source/Generated/GB_AxB__plus_isne_fp32.c
index 10991a8fc0..26982da59f 100644
--- a/Source/Generated/GB_AxB__plus_isne_fp32.c
+++ b/Source/Generated/GB_AxB__plus_isne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_isne_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_isne_fp32
-// A*B function (heap):      GB_AheapB__plus_isne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_ISNE_FP32 || GxB_NO_PLUS_ISNE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_fp32
 GrB_Info GB_Adot2B__plus_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_fp32
 GrB_Info GB_Adot3B__plus_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_fp64.c b/Source/Generated/GB_AxB__plus_isne_fp64.c
index 085c84a450..04cf0c5427 100644
--- a/Source/Generated/GB_AxB__plus_isne_fp64.c
+++ b/Source/Generated/GB_AxB__plus_isne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_isne_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_isne_fp64
-// A*B function (heap):      GB_AheapB__plus_isne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_ISNE_FP64 || GxB_NO_PLUS_ISNE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_fp64
 GrB_Info GB_Adot2B__plus_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_fp64
 GrB_Info GB_Adot3B__plus_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_int16.c b/Source/Generated/GB_AxB__plus_isne_int16.c
index 2bcc2c8d59..75b4b624e3 100644
--- a/Source/Generated/GB_AxB__plus_isne_int16.c
+++ b/Source/Generated/GB_AxB__plus_isne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_int16
 // A'*B function (dot2):     GB_Adot2B__plus_isne_int16
 // A'*B function (dot3):     GB_Adot3B__plus_isne_int16
-// A*B function (heap):      GB_AheapB__plus_isne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_ISNE_INT16 || GxB_NO_PLUS_ISNE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_int16
 GrB_Info GB_Adot2B__plus_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_int16
 GrB_Info GB_Adot3B__plus_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_int32.c b/Source/Generated/GB_AxB__plus_isne_int32.c
index 4311540084..e4c6347ba2 100644
--- a/Source/Generated/GB_AxB__plus_isne_int32.c
+++ b/Source/Generated/GB_AxB__plus_isne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_int32
 // A'*B function (dot2):     GB_Adot2B__plus_isne_int32
 // A'*B function (dot3):     GB_Adot3B__plus_isne_int32
-// A*B function (heap):      GB_AheapB__plus_isne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_ISNE_INT32 || GxB_NO_PLUS_ISNE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_int32
 GrB_Info GB_Adot2B__plus_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_int32
 GrB_Info GB_Adot3B__plus_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_int64.c b/Source/Generated/GB_AxB__plus_isne_int64.c
index 18f7ceca27..0c0d12f034 100644
--- a/Source/Generated/GB_AxB__plus_isne_int64.c
+++ b/Source/Generated/GB_AxB__plus_isne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_int64
 // A'*B function (dot2):     GB_Adot2B__plus_isne_int64
 // A'*B function (dot3):     GB_Adot3B__plus_isne_int64
-// A*B function (heap):      GB_AheapB__plus_isne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_ISNE_INT64 || GxB_NO_PLUS_ISNE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_int64
 GrB_Info GB_Adot2B__plus_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_int64
 GrB_Info GB_Adot3B__plus_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isne_int64
+GrB_Info GB_Asaxpy3B__plus_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_int8.c b/Source/Generated/GB_AxB__plus_isne_int8.c
index 757bb3dbed..7520a94afe 100644
--- a/Source/Generated/GB_AxB__plus_isne_int8.c
+++ b/Source/Generated/GB_AxB__plus_isne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_int8
 // A'*B function (dot2):     GB_Adot2B__plus_isne_int8
 // A'*B function (dot3):     GB_Adot3B__plus_isne_int8
-// A*B function (heap):      GB_AheapB__plus_isne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_ISNE_INT8 || GxB_NO_PLUS_ISNE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_int8
 GrB_Info GB_Adot2B__plus_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_int8
 GrB_Info GB_Adot3B__plus_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_uint16.c b/Source/Generated/GB_AxB__plus_isne_uint16.c
index 7d6dfdeaa6..0871ba5a9b 100644
--- a/Source/Generated/GB_AxB__plus_isne_uint16.c
+++ b/Source/Generated/GB_AxB__plus_isne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_isne_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_isne_uint16
-// A*B function (heap):      GB_AheapB__plus_isne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_ISNE_UINT16 || GxB_NO_PLUS_ISNE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_uint16
 GrB_Info GB_Adot2B__plus_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_uint16
 GrB_Info GB_Adot3B__plus_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_uint32.c b/Source/Generated/GB_AxB__plus_isne_uint32.c
index 2a36b3f0ba..2feebbc01a 100644
--- a/Source/Generated/GB_AxB__plus_isne_uint32.c
+++ b/Source/Generated/GB_AxB__plus_isne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_isne_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_isne_uint32
-// A*B function (heap):      GB_AheapB__plus_isne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_ISNE_UINT32 || GxB_NO_PLUS_ISNE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_uint32
 GrB_Info GB_Adot2B__plus_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_uint32
 GrB_Info GB_Adot3B__plus_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_uint64.c b/Source/Generated/GB_AxB__plus_isne_uint64.c
index 02e1e2fdf8..b5a6f9fd08 100644
--- a/Source/Generated/GB_AxB__plus_isne_uint64.c
+++ b/Source/Generated/GB_AxB__plus_isne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_isne_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_isne_uint64
-// A*B function (heap):      GB_AheapB__plus_isne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_ISNE_UINT64 || GxB_NO_PLUS_ISNE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_uint64
 GrB_Info GB_Adot2B__plus_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_uint64
 GrB_Info GB_Adot3B__plus_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_isne_uint64
+GrB_Info GB_Asaxpy3B__plus_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_isne_uint8.c b/Source/Generated/GB_AxB__plus_isne_uint8.c
index d6d0569090..b29a21eb0c 100644
--- a/Source/Generated/GB_AxB__plus_isne_uint8.c
+++ b/Source/Generated/GB_AxB__plus_isne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_isne_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_isne_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_isne_uint8
-// A*B function (heap):      GB_AheapB__plus_isne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_isne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_isne_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik != bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_ISNE_UINT8 || GxB_NO_PLUS_ISNE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_isne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_isne_uint8
 GrB_Info GB_Adot2B__plus_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_isne_uint8
 GrB_Info GB_Adot3B__plus_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_isne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_isne_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_fp32.c b/Source/Generated/GB_AxB__plus_land_fp32.c
index 9477205d6b..8472a7e7ce 100644
--- a/Source/Generated/GB_AxB__plus_land_fp32.c
+++ b/Source/Generated/GB_AxB__plus_land_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_land_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_land_fp32
-// A*B function (heap):      GB_AheapB__plus_land_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) && (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) && (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) && (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_LAND_FP32 || GxB_NO_PLUS_LAND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_fp32
 GrB_Info GB_Adot2B__plus_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_fp32
 GrB_Info GB_Adot3B__plus_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_land_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_fp64.c b/Source/Generated/GB_AxB__plus_land_fp64.c
index 8a9fbf8087..f809de74e4 100644
--- a/Source/Generated/GB_AxB__plus_land_fp64.c
+++ b/Source/Generated/GB_AxB__plus_land_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_land_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_land_fp64
-// A*B function (heap):      GB_AheapB__plus_land_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) && (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) && (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) && (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_LAND_FP64 || GxB_NO_PLUS_LAND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_fp64
 GrB_Info GB_Adot2B__plus_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_fp64
 GrB_Info GB_Adot3B__plus_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_land_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_int16.c b/Source/Generated/GB_AxB__plus_land_int16.c
index 591523fe9c..8f728c5be0 100644
--- a/Source/Generated/GB_AxB__plus_land_int16.c
+++ b/Source/Generated/GB_AxB__plus_land_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_int16
 // A'*B function (dot2):     GB_Adot2B__plus_land_int16
 // A'*B function (dot3):     GB_Adot3B__plus_land_int16
-// A*B function (heap):      GB_AheapB__plus_land_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_LAND_INT16 || GxB_NO_PLUS_LAND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_int16
 GrB_Info GB_Adot2B__plus_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_int16
 GrB_Info GB_Adot3B__plus_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_int16
+GrB_Info GB_Asaxpy3B__plus_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_int32.c b/Source/Generated/GB_AxB__plus_land_int32.c
index 949636f3e7..41fdfb51f2 100644
--- a/Source/Generated/GB_AxB__plus_land_int32.c
+++ b/Source/Generated/GB_AxB__plus_land_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_int32
 // A'*B function (dot2):     GB_Adot2B__plus_land_int32
 // A'*B function (dot3):     GB_Adot3B__plus_land_int32
-// A*B function (heap):      GB_AheapB__plus_land_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_LAND_INT32 || GxB_NO_PLUS_LAND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_int32
 GrB_Info GB_Adot2B__plus_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_int32
 GrB_Info GB_Adot3B__plus_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_int32
+GrB_Info GB_Asaxpy3B__plus_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_int64.c b/Source/Generated/GB_AxB__plus_land_int64.c
index deb53f044f..976809ae57 100644
--- a/Source/Generated/GB_AxB__plus_land_int64.c
+++ b/Source/Generated/GB_AxB__plus_land_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_int64
 // A'*B function (dot2):     GB_Adot2B__plus_land_int64
 // A'*B function (dot3):     GB_Adot3B__plus_land_int64
-// A*B function (heap):      GB_AheapB__plus_land_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_LAND_INT64 || GxB_NO_PLUS_LAND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_int64
 GrB_Info GB_Adot2B__plus_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_int64
 GrB_Info GB_Adot3B__plus_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_int64
+GrB_Info GB_Asaxpy3B__plus_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_int8.c b/Source/Generated/GB_AxB__plus_land_int8.c
index 7281f2adb6..941e9a68a6 100644
--- a/Source/Generated/GB_AxB__plus_land_int8.c
+++ b/Source/Generated/GB_AxB__plus_land_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_int8
 // A'*B function (dot2):     GB_Adot2B__plus_land_int8
 // A'*B function (dot3):     GB_Adot3B__plus_land_int8
-// A*B function (heap):      GB_AheapB__plus_land_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_LAND_INT8 || GxB_NO_PLUS_LAND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_int8
 GrB_Info GB_Adot2B__plus_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_int8
 GrB_Info GB_Adot3B__plus_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_int8
+GrB_Info GB_Asaxpy3B__plus_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_uint16.c b/Source/Generated/GB_AxB__plus_land_uint16.c
index b4bffdbc00..c0c4e06ed6 100644
--- a/Source/Generated/GB_AxB__plus_land_uint16.c
+++ b/Source/Generated/GB_AxB__plus_land_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_land_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_land_uint16
-// A*B function (heap):      GB_AheapB__plus_land_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_LAND_UINT16 || GxB_NO_PLUS_LAND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_uint16
 GrB_Info GB_Adot2B__plus_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_uint16
 GrB_Info GB_Adot3B__plus_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_uint16
+GrB_Info GB_Asaxpy3B__plus_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_uint32.c b/Source/Generated/GB_AxB__plus_land_uint32.c
index e49330f69d..d3c4964174 100644
--- a/Source/Generated/GB_AxB__plus_land_uint32.c
+++ b/Source/Generated/GB_AxB__plus_land_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_land_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_land_uint32
-// A*B function (heap):      GB_AheapB__plus_land_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_LAND_UINT32 || GxB_NO_PLUS_LAND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_uint32
 GrB_Info GB_Adot2B__plus_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_uint32
 GrB_Info GB_Adot3B__plus_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_uint32
+GrB_Info GB_Asaxpy3B__plus_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_uint64.c b/Source/Generated/GB_AxB__plus_land_uint64.c
index 9af689f5af..c3ea558177 100644
--- a/Source/Generated/GB_AxB__plus_land_uint64.c
+++ b/Source/Generated/GB_AxB__plus_land_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_land_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_land_uint64
-// A*B function (heap):      GB_AheapB__plus_land_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_LAND_UINT64 || GxB_NO_PLUS_LAND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_uint64
 GrB_Info GB_Adot2B__plus_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_uint64
 GrB_Info GB_Adot3B__plus_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_uint64
+GrB_Info GB_Asaxpy3B__plus_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_land_uint8.c b/Source/Generated/GB_AxB__plus_land_uint8.c
index e4ebb21f14..19884e3498 100644
--- a/Source/Generated/GB_AxB__plus_land_uint8.c
+++ b/Source/Generated/GB_AxB__plus_land_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_land_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_land_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_land_uint8
-// A*B function (heap):      GB_AheapB__plus_land_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_land_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_land_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_LAND_UINT8 || GxB_NO_PLUS_LAND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_land_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_land_uint8
 GrB_Info GB_Adot2B__plus_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_land_uint8
 GrB_Info GB_Adot3B__plus_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_land_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_land_uint8
+GrB_Info GB_Asaxpy3B__plus_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_fp32.c b/Source/Generated/GB_AxB__plus_lor_fp32.c
index 5bc0df08bc..d6c1dc57ee 100644
--- a/Source/Generated/GB_AxB__plus_lor_fp32.c
+++ b/Source/Generated/GB_AxB__plus_lor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_lor_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_lor_fp32
-// A*B function (heap):      GB_AheapB__plus_lor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) || (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) || (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) || (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_LOR_FP32 || GxB_NO_PLUS_LOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_fp32
 GrB_Info GB_Adot2B__plus_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_fp32
 GrB_Info GB_Adot3B__plus_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_lor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_fp64.c b/Source/Generated/GB_AxB__plus_lor_fp64.c
index 51952bd660..8c5dc6fb7b 100644
--- a/Source/Generated/GB_AxB__plus_lor_fp64.c
+++ b/Source/Generated/GB_AxB__plus_lor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_lor_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_lor_fp64
-// A*B function (heap):      GB_AheapB__plus_lor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) || (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) || (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) || (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_LOR_FP64 || GxB_NO_PLUS_LOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_fp64
 GrB_Info GB_Adot2B__plus_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_fp64
 GrB_Info GB_Adot3B__plus_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_lor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_int16.c b/Source/Generated/GB_AxB__plus_lor_int16.c
index 9164756cb2..8279a7defb 100644
--- a/Source/Generated/GB_AxB__plus_lor_int16.c
+++ b/Source/Generated/GB_AxB__plus_lor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_int16
 // A'*B function (dot2):     GB_Adot2B__plus_lor_int16
 // A'*B function (dot3):     GB_Adot3B__plus_lor_int16
-// A*B function (heap):      GB_AheapB__plus_lor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_LOR_INT16 || GxB_NO_PLUS_LOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_int16
 GrB_Info GB_Adot2B__plus_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_int16
 GrB_Info GB_Adot3B__plus_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_int16
+GrB_Info GB_Asaxpy3B__plus_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_int32.c b/Source/Generated/GB_AxB__plus_lor_int32.c
index bd19efd138..396a25484f 100644
--- a/Source/Generated/GB_AxB__plus_lor_int32.c
+++ b/Source/Generated/GB_AxB__plus_lor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_int32
 // A'*B function (dot2):     GB_Adot2B__plus_lor_int32
 // A'*B function (dot3):     GB_Adot3B__plus_lor_int32
-// A*B function (heap):      GB_AheapB__plus_lor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_LOR_INT32 || GxB_NO_PLUS_LOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_int32
 GrB_Info GB_Adot2B__plus_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_int32
 GrB_Info GB_Adot3B__plus_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_int32
+GrB_Info GB_Asaxpy3B__plus_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_int64.c b/Source/Generated/GB_AxB__plus_lor_int64.c
index 4136a42167..c333da407e 100644
--- a/Source/Generated/GB_AxB__plus_lor_int64.c
+++ b/Source/Generated/GB_AxB__plus_lor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_int64
 // A'*B function (dot2):     GB_Adot2B__plus_lor_int64
 // A'*B function (dot3):     GB_Adot3B__plus_lor_int64
-// A*B function (heap):      GB_AheapB__plus_lor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_LOR_INT64 || GxB_NO_PLUS_LOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_int64
 GrB_Info GB_Adot2B__plus_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_int64
 GrB_Info GB_Adot3B__plus_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_int64
+GrB_Info GB_Asaxpy3B__plus_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_int8.c b/Source/Generated/GB_AxB__plus_lor_int8.c
index b3a88534e2..1d6c256e96 100644
--- a/Source/Generated/GB_AxB__plus_lor_int8.c
+++ b/Source/Generated/GB_AxB__plus_lor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_int8
 // A'*B function (dot2):     GB_Adot2B__plus_lor_int8
 // A'*B function (dot3):     GB_Adot3B__plus_lor_int8
-// A*B function (heap):      GB_AheapB__plus_lor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_LOR_INT8 || GxB_NO_PLUS_LOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_int8
 GrB_Info GB_Adot2B__plus_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_int8
 GrB_Info GB_Adot3B__plus_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_int8
+GrB_Info GB_Asaxpy3B__plus_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_uint16.c b/Source/Generated/GB_AxB__plus_lor_uint16.c
index 81ae1dbaa5..5c3954f01a 100644
--- a/Source/Generated/GB_AxB__plus_lor_uint16.c
+++ b/Source/Generated/GB_AxB__plus_lor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_lor_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_lor_uint16
-// A*B function (heap):      GB_AheapB__plus_lor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_LOR_UINT16 || GxB_NO_PLUS_LOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_uint16
 GrB_Info GB_Adot2B__plus_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_uint16
 GrB_Info GB_Adot3B__plus_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_uint16
+GrB_Info GB_Asaxpy3B__plus_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_uint32.c b/Source/Generated/GB_AxB__plus_lor_uint32.c
index 217db585ad..32093d5912 100644
--- a/Source/Generated/GB_AxB__plus_lor_uint32.c
+++ b/Source/Generated/GB_AxB__plus_lor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_lor_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_lor_uint32
-// A*B function (heap):      GB_AheapB__plus_lor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_LOR_UINT32 || GxB_NO_PLUS_LOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_uint32
 GrB_Info GB_Adot2B__plus_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_uint32
 GrB_Info GB_Adot3B__plus_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_uint32
+GrB_Info GB_Asaxpy3B__plus_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_uint64.c b/Source/Generated/GB_AxB__plus_lor_uint64.c
index eca3c00352..0fa1996552 100644
--- a/Source/Generated/GB_AxB__plus_lor_uint64.c
+++ b/Source/Generated/GB_AxB__plus_lor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_lor_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_lor_uint64
-// A*B function (heap):      GB_AheapB__plus_lor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_LOR_UINT64 || GxB_NO_PLUS_LOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_uint64
 GrB_Info GB_Adot2B__plus_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_uint64
 GrB_Info GB_Adot3B__plus_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_uint64
+GrB_Info GB_Asaxpy3B__plus_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lor_uint8.c b/Source/Generated/GB_AxB__plus_lor_uint8.c
index ebe375bbc1..96e36c12d2 100644
--- a/Source/Generated/GB_AxB__plus_lor_uint8.c
+++ b/Source/Generated/GB_AxB__plus_lor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lor_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_lor_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_lor_uint8
-// A*B function (heap):      GB_AheapB__plus_lor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_LOR_UINT8 || GxB_NO_PLUS_LOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lor_uint8
 GrB_Info GB_Adot2B__plus_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lor_uint8
 GrB_Info GB_Adot3B__plus_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lor_uint8
+GrB_Info GB_Asaxpy3B__plus_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_fp32.c b/Source/Generated/GB_AxB__plus_lxor_fp32.c
index d49a04e433..a3a8e39037 100644
--- a/Source/Generated/GB_AxB__plus_lxor_fp32.c
+++ b/Source/Generated/GB_AxB__plus_lxor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_fp32
-// A*B function (heap):      GB_AheapB__plus_lxor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) != (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) != (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) != (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_LXOR_FP32 || GxB_NO_PLUS_LXOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_fp32
 GrB_Info GB_Adot2B__plus_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_fp32
 GrB_Info GB_Adot3B__plus_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_lxor_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_fp64.c b/Source/Generated/GB_AxB__plus_lxor_fp64.c
index 07537e9f00..13ca9f3131 100644
--- a/Source/Generated/GB_AxB__plus_lxor_fp64.c
+++ b/Source/Generated/GB_AxB__plus_lxor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_fp64
-// A*B function (heap):      GB_AheapB__plus_lxor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += ((aik != 0) != (bkj != 0))
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += ((x != 0) != (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += ((x != 0) != (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_LXOR_FP64 || GxB_NO_PLUS_LXOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_fp64
 GrB_Info GB_Adot2B__plus_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_fp64
 GrB_Info GB_Adot3B__plus_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_lxor_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_int16.c b/Source/Generated/GB_AxB__plus_lxor_int16.c
index 4c08f68c66..5590f5e158 100644
--- a/Source/Generated/GB_AxB__plus_lxor_int16.c
+++ b/Source/Generated/GB_AxB__plus_lxor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_int16
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_int16
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_int16
-// A*B function (heap):      GB_AheapB__plus_lxor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_LXOR_INT16 || GxB_NO_PLUS_LXOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_int16
 GrB_Info GB_Adot2B__plus_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_int16
 GrB_Info GB_Adot3B__plus_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_int16
+GrB_Info GB_Asaxpy3B__plus_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_int32.c b/Source/Generated/GB_AxB__plus_lxor_int32.c
index c8aa3839cb..e30ff1311a 100644
--- a/Source/Generated/GB_AxB__plus_lxor_int32.c
+++ b/Source/Generated/GB_AxB__plus_lxor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_int32
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_int32
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_int32
-// A*B function (heap):      GB_AheapB__plus_lxor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_LXOR_INT32 || GxB_NO_PLUS_LXOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_int32
 GrB_Info GB_Adot2B__plus_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_int32
 GrB_Info GB_Adot3B__plus_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_int32
+GrB_Info GB_Asaxpy3B__plus_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_int64.c b/Source/Generated/GB_AxB__plus_lxor_int64.c
index e887c9f031..af445bc634 100644
--- a/Source/Generated/GB_AxB__plus_lxor_int64.c
+++ b/Source/Generated/GB_AxB__plus_lxor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_int64
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_int64
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_int64
-// A*B function (heap):      GB_AheapB__plus_lxor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_LXOR_INT64 || GxB_NO_PLUS_LXOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_int64
 GrB_Info GB_Adot2B__plus_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_int64
 GrB_Info GB_Adot3B__plus_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_int64
+GrB_Info GB_Asaxpy3B__plus_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_int8.c b/Source/Generated/GB_AxB__plus_lxor_int8.c
index b9acdd0063..6bfb375749 100644
--- a/Source/Generated/GB_AxB__plus_lxor_int8.c
+++ b/Source/Generated/GB_AxB__plus_lxor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_int8
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_int8
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_int8
-// A*B function (heap):      GB_AheapB__plus_lxor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_LXOR_INT8 || GxB_NO_PLUS_LXOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_int8
 GrB_Info GB_Adot2B__plus_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_int8
 GrB_Info GB_Adot3B__plus_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_int8
+GrB_Info GB_Asaxpy3B__plus_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_uint16.c b/Source/Generated/GB_AxB__plus_lxor_uint16.c
index 45405b0a6a..0a0953b0b3 100644
--- a/Source/Generated/GB_AxB__plus_lxor_uint16.c
+++ b/Source/Generated/GB_AxB__plus_lxor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_uint16
-// A*B function (heap):      GB_AheapB__plus_lxor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_LXOR_UINT16 || GxB_NO_PLUS_LXOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_uint16
 GrB_Info GB_Adot2B__plus_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_uint16
 GrB_Info GB_Adot3B__plus_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_uint16
+GrB_Info GB_Asaxpy3B__plus_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_uint32.c b/Source/Generated/GB_AxB__plus_lxor_uint32.c
index 08dea6b6b3..1b173ad619 100644
--- a/Source/Generated/GB_AxB__plus_lxor_uint32.c
+++ b/Source/Generated/GB_AxB__plus_lxor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_uint32
-// A*B function (heap):      GB_AheapB__plus_lxor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_LXOR_UINT32 || GxB_NO_PLUS_LXOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_uint32
 GrB_Info GB_Adot2B__plus_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_uint32
 GrB_Info GB_Adot3B__plus_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_uint32
+GrB_Info GB_Asaxpy3B__plus_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_uint64.c b/Source/Generated/GB_AxB__plus_lxor_uint64.c
index 3ba7cd624d..c6f4393c31 100644
--- a/Source/Generated/GB_AxB__plus_lxor_uint64.c
+++ b/Source/Generated/GB_AxB__plus_lxor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_uint64
-// A*B function (heap):      GB_AheapB__plus_lxor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_LXOR_UINT64 || GxB_NO_PLUS_LXOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_uint64
 GrB_Info GB_Adot2B__plus_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_uint64
 GrB_Info GB_Adot3B__plus_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_uint64
+GrB_Info GB_Asaxpy3B__plus_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_lxor_uint8.c b/Source/Generated/GB_AxB__plus_lxor_uint8.c
index a25e989e3c..0bfef0d94b 100644
--- a/Source/Generated/GB_AxB__plus_lxor_uint8.c
+++ b/Source/Generated/GB_AxB__plus_lxor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_lxor_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_lxor_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_lxor_uint8
-// A*B function (heap):      GB_AheapB__plus_lxor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_lxor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_lxor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_LXOR_UINT8 || GxB_NO_PLUS_LXOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_lxor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_lxor_uint8
 GrB_Info GB_Adot2B__plus_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_lxor_uint8
 GrB_Info GB_Adot3B__plus_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_lxor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_lxor_uint8
+GrB_Info GB_Asaxpy3B__plus_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_fp32.c b/Source/Generated/GB_AxB__plus_max_fp32.c
index 619fc94c40..37d3660215 100644
--- a/Source/Generated/GB_AxB__plus_max_fp32.c
+++ b/Source/Generated/GB_AxB__plus_max_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_max_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_max_fp32
-// A*B function (heap):      GB_AheapB__plus_max_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmaxf (aik, bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += fmaxf (aik, bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmaxf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmaxf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += fmaxf (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += fmaxf (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_MAX_FP32 || GxB_NO_PLUS_MAX_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_fp32
 GrB_Info GB_Adot2B__plus_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_fp32
 GrB_Info GB_Adot3B__plus_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_max_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_fp64.c b/Source/Generated/GB_AxB__plus_max_fp64.c
index 0b71c0e541..c8b4257983 100644
--- a/Source/Generated/GB_AxB__plus_max_fp64.c
+++ b/Source/Generated/GB_AxB__plus_max_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_max_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_max_fp64
-// A*B function (heap):      GB_AheapB__plus_max_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmax (aik, bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += fmax (aik, bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmax (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmax (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += fmax (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += fmax (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_MAX_FP64 || GxB_NO_PLUS_MAX_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_fp64
 GrB_Info GB_Adot2B__plus_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_fp64
 GrB_Info GB_Adot3B__plus_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_max_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_int16.c b/Source/Generated/GB_AxB__plus_max_int16.c
index 08b9af360b..d2f3b23e57 100644
--- a/Source/Generated/GB_AxB__plus_max_int16.c
+++ b/Source/Generated/GB_AxB__plus_max_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_int16
 // A'*B function (dot2):     GB_Adot2B__plus_max_int16
 // A'*B function (dot3):     GB_Adot3B__plus_max_int16
-// A*B function (heap):      GB_AheapB__plus_max_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_MAX_INT16 || GxB_NO_PLUS_MAX_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_int16
 GrB_Info GB_Adot2B__plus_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_int16
 GrB_Info GB_Adot3B__plus_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_int16
+GrB_Info GB_Asaxpy3B__plus_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_int32.c b/Source/Generated/GB_AxB__plus_max_int32.c
index df5e30ea7c..251ad5eeff 100644
--- a/Source/Generated/GB_AxB__plus_max_int32.c
+++ b/Source/Generated/GB_AxB__plus_max_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_int32
 // A'*B function (dot2):     GB_Adot2B__plus_max_int32
 // A'*B function (dot3):     GB_Adot3B__plus_max_int32
-// A*B function (heap):      GB_AheapB__plus_max_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_MAX_INT32 || GxB_NO_PLUS_MAX_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_int32
 GrB_Info GB_Adot2B__plus_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_int32
 GrB_Info GB_Adot3B__plus_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_int32
+GrB_Info GB_Asaxpy3B__plus_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_int64.c b/Source/Generated/GB_AxB__plus_max_int64.c
index 8196f691a8..b00f91836d 100644
--- a/Source/Generated/GB_AxB__plus_max_int64.c
+++ b/Source/Generated/GB_AxB__plus_max_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_int64
 // A'*B function (dot2):     GB_Adot2B__plus_max_int64
 // A'*B function (dot3):     GB_Adot3B__plus_max_int64
-// A*B function (heap):      GB_AheapB__plus_max_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_MAX_INT64 || GxB_NO_PLUS_MAX_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_int64
 GrB_Info GB_Adot2B__plus_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_int64
 GrB_Info GB_Adot3B__plus_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_int64
+GrB_Info GB_Asaxpy3B__plus_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_int8.c b/Source/Generated/GB_AxB__plus_max_int8.c
index ea76df3493..d274aaf49d 100644
--- a/Source/Generated/GB_AxB__plus_max_int8.c
+++ b/Source/Generated/GB_AxB__plus_max_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_int8
 // A'*B function (dot2):     GB_Adot2B__plus_max_int8
 // A'*B function (dot3):     GB_Adot3B__plus_max_int8
-// A*B function (heap):      GB_AheapB__plus_max_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_MAX_INT8 || GxB_NO_PLUS_MAX_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_int8
 GrB_Info GB_Adot2B__plus_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_int8
 GrB_Info GB_Adot3B__plus_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_int8
+GrB_Info GB_Asaxpy3B__plus_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_uint16.c b/Source/Generated/GB_AxB__plus_max_uint16.c
index 23b4ae3662..de4e16b6df 100644
--- a/Source/Generated/GB_AxB__plus_max_uint16.c
+++ b/Source/Generated/GB_AxB__plus_max_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_max_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_max_uint16
-// A*B function (heap):      GB_AheapB__plus_max_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_PLUS_MAX_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_uint16
 GrB_Info GB_Adot2B__plus_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_uint16
 GrB_Info GB_Adot3B__plus_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_uint16
+GrB_Info GB_Asaxpy3B__plus_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_uint32.c b/Source/Generated/GB_AxB__plus_max_uint32.c
index 874ad53207..bcd723239c 100644
--- a/Source/Generated/GB_AxB__plus_max_uint32.c
+++ b/Source/Generated/GB_AxB__plus_max_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_max_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_max_uint32
-// A*B function (heap):      GB_AheapB__plus_max_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_PLUS_MAX_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_uint32
 GrB_Info GB_Adot2B__plus_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_uint32
 GrB_Info GB_Adot3B__plus_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_uint32
+GrB_Info GB_Asaxpy3B__plus_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_uint64.c b/Source/Generated/GB_AxB__plus_max_uint64.c
index 7e06921fcc..77064d4909 100644
--- a/Source/Generated/GB_AxB__plus_max_uint64.c
+++ b/Source/Generated/GB_AxB__plus_max_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_max_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_max_uint64
-// A*B function (heap):      GB_AheapB__plus_max_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_PLUS_MAX_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_uint64
 GrB_Info GB_Adot2B__plus_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_uint64
 GrB_Info GB_Adot3B__plus_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_uint64
+GrB_Info GB_Asaxpy3B__plus_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_max_uint8.c b/Source/Generated/GB_AxB__plus_max_uint8.c
index b24953af1f..9941275574 100644
--- a/Source/Generated/GB_AxB__plus_max_uint8.c
+++ b/Source/Generated/GB_AxB__plus_max_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_max_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_max_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_max_uint8
-// A*B function (heap):      GB_AheapB__plus_max_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_max_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_max_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IMAX (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMAX (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMAX (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_PLUS_MAX_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_max_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_max_uint8
 GrB_Info GB_Adot2B__plus_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_max_uint8
 GrB_Info GB_Adot3B__plus_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_max_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_max_uint8
+GrB_Info GB_Asaxpy3B__plus_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_fp32.c b/Source/Generated/GB_AxB__plus_min_fp32.c
index d130602f25..2a516b4d6b 100644
--- a/Source/Generated/GB_AxB__plus_min_fp32.c
+++ b/Source/Generated/GB_AxB__plus_min_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_min_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_min_fp32
-// A*B function (heap):      GB_AheapB__plus_min_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fminf (aik, bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += fminf (aik, bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fminf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fminf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += fminf (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += fminf (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_MIN_FP32 || GxB_NO_PLUS_MIN_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_fp32
 GrB_Info GB_Adot2B__plus_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_fp32
 GrB_Info GB_Adot3B__plus_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_min_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_fp64.c b/Source/Generated/GB_AxB__plus_min_fp64.c
index 031b658684..59b1378fbf 100644
--- a/Source/Generated/GB_AxB__plus_min_fp64.c
+++ b/Source/Generated/GB_AxB__plus_min_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_min_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_min_fp64
-// A*B function (heap):      GB_AheapB__plus_min_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmin (aik, bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += fmin (aik, bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmin (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmin (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += fmin (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += fmin (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_MIN_FP64 || GxB_NO_PLUS_MIN_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_fp64
 GrB_Info GB_Adot2B__plus_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_fp64
 GrB_Info GB_Adot3B__plus_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_min_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_int16.c b/Source/Generated/GB_AxB__plus_min_int16.c
index 23bf59c698..4d963483c1 100644
--- a/Source/Generated/GB_AxB__plus_min_int16.c
+++ b/Source/Generated/GB_AxB__plus_min_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_int16
 // A'*B function (dot2):     GB_Adot2B__plus_min_int16
 // A'*B function (dot3):     GB_Adot3B__plus_min_int16
-// A*B function (heap):      GB_AheapB__plus_min_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_MIN_INT16 || GxB_NO_PLUS_MIN_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_int16
 GrB_Info GB_Adot2B__plus_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_int16
 GrB_Info GB_Adot3B__plus_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_int16
+GrB_Info GB_Asaxpy3B__plus_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_int32.c b/Source/Generated/GB_AxB__plus_min_int32.c
index 9d5c80023b..9d798c9724 100644
--- a/Source/Generated/GB_AxB__plus_min_int32.c
+++ b/Source/Generated/GB_AxB__plus_min_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_int32
 // A'*B function (dot2):     GB_Adot2B__plus_min_int32
 // A'*B function (dot3):     GB_Adot3B__plus_min_int32
-// A*B function (heap):      GB_AheapB__plus_min_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_MIN_INT32 || GxB_NO_PLUS_MIN_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_int32
 GrB_Info GB_Adot2B__plus_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_int32
 GrB_Info GB_Adot3B__plus_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_int32
+GrB_Info GB_Asaxpy3B__plus_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_int64.c b/Source/Generated/GB_AxB__plus_min_int64.c
index 88854574bd..6cd42590d1 100644
--- a/Source/Generated/GB_AxB__plus_min_int64.c
+++ b/Source/Generated/GB_AxB__plus_min_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_int64
 // A'*B function (dot2):     GB_Adot2B__plus_min_int64
 // A'*B function (dot3):     GB_Adot3B__plus_min_int64
-// A*B function (heap):      GB_AheapB__plus_min_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_MIN_INT64 || GxB_NO_PLUS_MIN_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_int64
 GrB_Info GB_Adot2B__plus_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_int64
 GrB_Info GB_Adot3B__plus_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_int64
+GrB_Info GB_Asaxpy3B__plus_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_int8.c b/Source/Generated/GB_AxB__plus_min_int8.c
index d05fed8803..3af9d90329 100644
--- a/Source/Generated/GB_AxB__plus_min_int8.c
+++ b/Source/Generated/GB_AxB__plus_min_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_int8
 // A'*B function (dot2):     GB_Adot2B__plus_min_int8
 // A'*B function (dot3):     GB_Adot3B__plus_min_int8
-// A*B function (heap):      GB_AheapB__plus_min_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_MIN_INT8 || GxB_NO_PLUS_MIN_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_int8
 GrB_Info GB_Adot2B__plus_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_int8
 GrB_Info GB_Adot3B__plus_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_int8
+GrB_Info GB_Asaxpy3B__plus_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_uint16.c b/Source/Generated/GB_AxB__plus_min_uint16.c
index 38f0bdeeab..a6c811a6f2 100644
--- a/Source/Generated/GB_AxB__plus_min_uint16.c
+++ b/Source/Generated/GB_AxB__plus_min_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_min_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_min_uint16
-// A*B function (heap):      GB_AheapB__plus_min_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_PLUS_MIN_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_uint16
 GrB_Info GB_Adot2B__plus_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_uint16
 GrB_Info GB_Adot3B__plus_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_uint16
+GrB_Info GB_Asaxpy3B__plus_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_uint32.c b/Source/Generated/GB_AxB__plus_min_uint32.c
index e881ce4ebd..e710b51162 100644
--- a/Source/Generated/GB_AxB__plus_min_uint32.c
+++ b/Source/Generated/GB_AxB__plus_min_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_min_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_min_uint32
-// A*B function (heap):      GB_AheapB__plus_min_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_PLUS_MIN_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_uint32
 GrB_Info GB_Adot2B__plus_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_uint32
 GrB_Info GB_Adot3B__plus_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_uint32
+GrB_Info GB_Asaxpy3B__plus_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_uint64.c b/Source/Generated/GB_AxB__plus_min_uint64.c
index b77e759a74..bef45eee8c 100644
--- a/Source/Generated/GB_AxB__plus_min_uint64.c
+++ b/Source/Generated/GB_AxB__plus_min_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_min_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_min_uint64
-// A*B function (heap):      GB_AheapB__plus_min_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_PLUS_MIN_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_uint64
 GrB_Info GB_Adot2B__plus_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_uint64
 GrB_Info GB_Adot3B__plus_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_uint64
+GrB_Info GB_Asaxpy3B__plus_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_min_uint8.c b/Source/Generated/GB_AxB__plus_min_uint8.c
index 2c39c5669e..51c3b7fd1b 100644
--- a/Source/Generated/GB_AxB__plus_min_uint8.c
+++ b/Source/Generated/GB_AxB__plus_min_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_min_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_min_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_min_uint8
-// A*B function (heap):      GB_AheapB__plus_min_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_min_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_min_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IMIN (aik, bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMIN (x, y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMIN (x, y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_PLUS_MIN_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_min_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_min_uint8
 GrB_Info GB_Adot2B__plus_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_min_uint8
 GrB_Info GB_Adot3B__plus_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_min_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_min_uint8
+GrB_Info GB_Asaxpy3B__plus_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_fp32.c b/Source/Generated/GB_AxB__plus_minus_fp32.c
index 88baab5c2b..1b072920bd 100644
--- a/Source/Generated/GB_AxB__plus_minus_fp32.c
+++ b/Source/Generated/GB_AxB__plus_minus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_minus_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_minus_fp32
-// A*B function (heap):      GB_AheapB__plus_minus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik - bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x - y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x - y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_MINUS_FP32 || GxB_NO_PLUS_MINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_fp32
 GrB_Info GB_Adot2B__plus_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_fp32
 GrB_Info GB_Adot3B__plus_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_minus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_fp64.c b/Source/Generated/GB_AxB__plus_minus_fp64.c
index 34193299b9..e0d1aa5f3f 100644
--- a/Source/Generated/GB_AxB__plus_minus_fp64.c
+++ b/Source/Generated/GB_AxB__plus_minus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_minus_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_minus_fp64
-// A*B function (heap):      GB_AheapB__plus_minus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik - bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x - y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x - y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_MINUS_FP64 || GxB_NO_PLUS_MINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_fp64
 GrB_Info GB_Adot2B__plus_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_fp64
 GrB_Info GB_Adot3B__plus_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_minus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_int16.c b/Source/Generated/GB_AxB__plus_minus_int16.c
index 54156cae91..9530483201 100644
--- a/Source/Generated/GB_AxB__plus_minus_int16.c
+++ b/Source/Generated/GB_AxB__plus_minus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_int16
 // A'*B function (dot2):     GB_Adot2B__plus_minus_int16
 // A'*B function (dot3):     GB_Adot3B__plus_minus_int16
-// A*B function (heap):      GB_AheapB__plus_minus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_MINUS_INT16 || GxB_NO_PLUS_MINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_int16
 GrB_Info GB_Adot2B__plus_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_int16
 GrB_Info GB_Adot3B__plus_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_int16
+GrB_Info GB_Asaxpy3B__plus_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_int32.c b/Source/Generated/GB_AxB__plus_minus_int32.c
index f08e561def..eb776f9c79 100644
--- a/Source/Generated/GB_AxB__plus_minus_int32.c
+++ b/Source/Generated/GB_AxB__plus_minus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_int32
 // A'*B function (dot2):     GB_Adot2B__plus_minus_int32
 // A'*B function (dot3):     GB_Adot3B__plus_minus_int32
-// A*B function (heap):      GB_AheapB__plus_minus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_MINUS_INT32 || GxB_NO_PLUS_MINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_int32
 GrB_Info GB_Adot2B__plus_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_int32
 GrB_Info GB_Adot3B__plus_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_int32
+GrB_Info GB_Asaxpy3B__plus_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_int64.c b/Source/Generated/GB_AxB__plus_minus_int64.c
index 6bcea7deb7..620cb52aee 100644
--- a/Source/Generated/GB_AxB__plus_minus_int64.c
+++ b/Source/Generated/GB_AxB__plus_minus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_int64
 // A'*B function (dot2):     GB_Adot2B__plus_minus_int64
 // A'*B function (dot3):     GB_Adot3B__plus_minus_int64
-// A*B function (heap):      GB_AheapB__plus_minus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x - y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x - y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_MINUS_INT64 || GxB_NO_PLUS_MINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_int64
 GrB_Info GB_Adot2B__plus_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_int64
 GrB_Info GB_Adot3B__plus_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_int64
+GrB_Info GB_Asaxpy3B__plus_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_int8.c b/Source/Generated/GB_AxB__plus_minus_int8.c
index f82225d783..d5b62ef80e 100644
--- a/Source/Generated/GB_AxB__plus_minus_int8.c
+++ b/Source/Generated/GB_AxB__plus_minus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_int8
 // A'*B function (dot2):     GB_Adot2B__plus_minus_int8
 // A'*B function (dot3):     GB_Adot3B__plus_minus_int8
-// A*B function (heap):      GB_AheapB__plus_minus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_MINUS_INT8 || GxB_NO_PLUS_MINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_int8
 GrB_Info GB_Adot2B__plus_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_int8
 GrB_Info GB_Adot3B__plus_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_int8
+GrB_Info GB_Asaxpy3B__plus_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_uint16.c b/Source/Generated/GB_AxB__plus_minus_uint16.c
index da4d131843..a00bd38af4 100644
--- a/Source/Generated/GB_AxB__plus_minus_uint16.c
+++ b/Source/Generated/GB_AxB__plus_minus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_minus_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_minus_uint16
-// A*B function (heap):      GB_AheapB__plus_minus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_MINUS_UINT16 || GxB_NO_PLUS_MINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_uint16
 GrB_Info GB_Adot2B__plus_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_uint16
 GrB_Info GB_Adot3B__plus_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_uint16
+GrB_Info GB_Asaxpy3B__plus_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_uint32.c b/Source/Generated/GB_AxB__plus_minus_uint32.c
index 2f9085b375..e3000a6d8e 100644
--- a/Source/Generated/GB_AxB__plus_minus_uint32.c
+++ b/Source/Generated/GB_AxB__plus_minus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_minus_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_minus_uint32
-// A*B function (heap):      GB_AheapB__plus_minus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_MINUS_UINT32 || GxB_NO_PLUS_MINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_uint32
 GrB_Info GB_Adot2B__plus_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_uint32
 GrB_Info GB_Adot3B__plus_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_uint32
+GrB_Info GB_Asaxpy3B__plus_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_uint64.c b/Source/Generated/GB_AxB__plus_minus_uint64.c
index cfe493c3e6..8490d529bc 100644
--- a/Source/Generated/GB_AxB__plus_minus_uint64.c
+++ b/Source/Generated/GB_AxB__plus_minus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_minus_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_minus_uint64
-// A*B function (heap):      GB_AheapB__plus_minus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x - y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x - y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_MINUS_UINT64 || GxB_NO_PLUS_MINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_uint64
 GrB_Info GB_Adot2B__plus_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_uint64
 GrB_Info GB_Adot3B__plus_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_uint64
+GrB_Info GB_Asaxpy3B__plus_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_minus_uint8.c b/Source/Generated/GB_AxB__plus_minus_uint8.c
index 07c116d2ea..84e8ebe3ee 100644
--- a/Source/Generated/GB_AxB__plus_minus_uint8.c
+++ b/Source/Generated/GB_AxB__plus_minus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_minus_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_minus_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_minus_uint8
-// A*B function (heap):      GB_AheapB__plus_minus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_minus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_minus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik - bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x - y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x - y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_MINUS_UINT8 || GxB_NO_PLUS_MINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_minus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_minus_uint8
 GrB_Info GB_Adot2B__plus_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_minus_uint8
 GrB_Info GB_Adot3B__plus_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_minus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_minus_uint8
+GrB_Info GB_Asaxpy3B__plus_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_pair_fp32.c b/Source/Generated/GB_AxB__plus_pair_fp32.c
new file mode 100644
index 0000000000..6186c14d06
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_fp32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_fp32
+// A'*B function (dot3):     GB_Adot3B__plus_pair_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    float cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_PAIR_FP32 || GxB_NO_PLUS_PAIR_FP32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_fp64.c b/Source/Generated/GB_AxB__plus_pair_fp64.c
new file mode 100644
index 0000000000..48ad27145a
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_fp64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_fp64
+// A'*B function (dot3):     GB_Adot3B__plus_pair_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    double cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_PAIR_FP64 || GxB_NO_PLUS_PAIR_FP64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_int16.c b/Source/Generated/GB_AxB__plus_pair_int16.c
new file mode 100644
index 0000000000..337aab215d
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_int16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_int16
+// A'*B function (dot3):     GB_Adot3B__plus_pair_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_PAIR_INT16 || GxB_NO_PLUS_PAIR_INT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_int32.c b/Source/Generated/GB_AxB__plus_pair_int32.c
new file mode 100644
index 0000000000..9f374653b0
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_int32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_int32
+// A'*B function (dot3):     GB_Adot3B__plus_pair_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_PAIR_INT32 || GxB_NO_PLUS_PAIR_INT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_int64.c b/Source/Generated/GB_AxB__plus_pair_int64.c
new file mode 100644
index 0000000000..96f4313412
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_int64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_int64
+// A'*B function (dot3):     GB_Adot3B__plus_pair_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_PAIR_INT64 || GxB_NO_PLUS_PAIR_INT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_int8.c b/Source/Generated/GB_AxB__plus_pair_int8.c
new file mode 100644
index 0000000000..ad4e94b8cc
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_int8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_int8
+// A'*B function (dot3):     GB_Adot3B__plus_pair_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_PAIR_INT8 || GxB_NO_PLUS_PAIR_INT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_uint16.c b/Source/Generated/GB_AxB__plus_pair_uint16.c
new file mode 100644
index 0000000000..0669a68117
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_uint16.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_uint16
+// A'*B function (dot3):     GB_Adot3B__plus_pair_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_PAIR_UINT16 || GxB_NO_PLUS_PAIR_UINT16)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_uint32.c b/Source/Generated/GB_AxB__plus_pair_uint32.c
new file mode 100644
index 0000000000..6f8f9823ee
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_uint32.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_uint32
+// A'*B function (dot3):     GB_Adot3B__plus_pair_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_PAIR_UINT32 || GxB_NO_PLUS_PAIR_UINT32)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_uint64.c b/Source/Generated/GB_AxB__plus_pair_uint64.c
new file mode 100644
index 0000000000..2f5efebf36
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_uint64.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_uint64
+// A'*B function (dot3):     GB_Adot3B__plus_pair_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_PAIR_UINT64 || GxB_NO_PLUS_PAIR_UINT64)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_pair_uint8.c b/Source/Generated/GB_AxB__plus_pair_uint8.c
new file mode 100644
index 0000000000..7cbe239e14
--- /dev/null
+++ b/Source/Generated/GB_AxB__plus_pair_uint8.c
@@ -0,0 +1,272 @@
+//------------------------------------------------------------------------------
+// GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_bracket.h"
+#include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
+#include "GB_AxB__include.h"
+
+// The C=A*B semiring is defined by the following types and operators:
+
+// A'*B function (dot2):     GB_Adot2B__plus_pair_uint8
+// A'*B function (dot3):     GB_Adot3B__plus_pair_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_pair_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_pair_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+
+// Multiply: z = 1
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
+// MultAdd:  cij += 1
+// Identity: 0
+// Terminal: ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aik = Ax [pA]
+#define GB_GETA(aik,Ax,pA) \
+    ;
+
+// bkj = Bx [pB]
+#define GB_GETB(bkj,Bx,pB) \
+    ;
+
+#define GB_CX(p) Cx [p]
+
+// multiply operator
+#define GB_MULT(z, x, y) \
+    z = 1
+
+// multiply-add
+#define GB_MULTADD(z, x, y) \
+    z += 1
+
+// monoid identity value
+#define GB_IDENTITY \
+    0
+
+// break if cij reaches the terminal value (dot product only)
+#define GB_DOT_TERMINAL(cij) \
+    ;
+
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_PRAGMA_SIMD
+
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// declare the cij scalar
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
+
+// save the value of C(i,j)
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    1
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
+
+// disable this semiring and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PLUS || GxB_NO_PAIR || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_PAIR_UINT8 || GxB_NO_PLUS_PAIR_UINT8)
+
+//------------------------------------------------------------------------------
+// C=A'*B or C<!M>=A'*B: dot product (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot2B__plus_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix *Aslice, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice,
+    int64_t *GB_RESTRICT *C_counts,
+    int nthreads, int naslice, int nbslice
+)
+{ 
+    // C<M>=A'*B now uses dot3
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #define GB_PHASE_2_OF_2
+    #include "GB_AxB_dot2_meta.c"
+    #undef GB_PHASE_2_OF_2
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A'*B: masked dot product method (phase 2)
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot3B__plus_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_AxB__plus_plus_fp32.c b/Source/Generated/GB_AxB__plus_plus_fp32.c
index 4c99cea1db..5a873f57fe 100644
--- a/Source/Generated/GB_AxB__plus_plus_fp32.c
+++ b/Source/Generated/GB_AxB__plus_plus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_plus_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_plus_fp32
-// A*B function (heap):      GB_AheapB__plus_plus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik + bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x + y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x + y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_PLUS_PLUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_fp32
 GrB_Info GB_Adot2B__plus_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_fp32
 GrB_Info GB_Adot3B__plus_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_plus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_fp64.c b/Source/Generated/GB_AxB__plus_plus_fp64.c
index e518d25fbf..a24c5ff659 100644
--- a/Source/Generated/GB_AxB__plus_plus_fp64.c
+++ b/Source/Generated/GB_AxB__plus_plus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_plus_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_plus_fp64
-// A*B function (heap):      GB_AheapB__plus_plus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik + bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x + y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x + y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_PLUS_PLUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_fp64
 GrB_Info GB_Adot2B__plus_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_fp64
 GrB_Info GB_Adot3B__plus_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_plus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_int16.c b/Source/Generated/GB_AxB__plus_plus_int16.c
index 46b996b7ab..6034510f14 100644
--- a/Source/Generated/GB_AxB__plus_plus_int16.c
+++ b/Source/Generated/GB_AxB__plus_plus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_int16
 // A'*B function (dot2):     GB_Adot2B__plus_plus_int16
 // A'*B function (dot3):     GB_Adot3B__plus_plus_int16
-// A*B function (heap):      GB_AheapB__plus_plus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_PLUS_PLUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_int16
 GrB_Info GB_Adot2B__plus_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_int16
 GrB_Info GB_Adot3B__plus_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_int16
+GrB_Info GB_Asaxpy3B__plus_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_int32.c b/Source/Generated/GB_AxB__plus_plus_int32.c
index 10a6bfa229..86de3fbd42 100644
--- a/Source/Generated/GB_AxB__plus_plus_int32.c
+++ b/Source/Generated/GB_AxB__plus_plus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_int32
 // A'*B function (dot2):     GB_Adot2B__plus_plus_int32
 // A'*B function (dot3):     GB_Adot3B__plus_plus_int32
-// A*B function (heap):      GB_AheapB__plus_plus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_PLUS_PLUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_int32
 GrB_Info GB_Adot2B__plus_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_int32
 GrB_Info GB_Adot3B__plus_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_int32
+GrB_Info GB_Asaxpy3B__plus_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_int64.c b/Source/Generated/GB_AxB__plus_plus_int64.c
index c015430e83..07af06bfd5 100644
--- a/Source/Generated/GB_AxB__plus_plus_int64.c
+++ b/Source/Generated/GB_AxB__plus_plus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_int64
 // A'*B function (dot2):     GB_Adot2B__plus_plus_int64
 // A'*B function (dot3):     GB_Adot3B__plus_plus_int64
-// A*B function (heap):      GB_AheapB__plus_plus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x + y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x + y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_PLUS_PLUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_int64
 GrB_Info GB_Adot2B__plus_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_int64
 GrB_Info GB_Adot3B__plus_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_int64
+GrB_Info GB_Asaxpy3B__plus_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_int8.c b/Source/Generated/GB_AxB__plus_plus_int8.c
index c354200c55..a89e95a26e 100644
--- a/Source/Generated/GB_AxB__plus_plus_int8.c
+++ b/Source/Generated/GB_AxB__plus_plus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_int8
 // A'*B function (dot2):     GB_Adot2B__plus_plus_int8
 // A'*B function (dot3):     GB_Adot3B__plus_plus_int8
-// A*B function (heap):      GB_AheapB__plus_plus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_PLUS_PLUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_int8
 GrB_Info GB_Adot2B__plus_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_int8
 GrB_Info GB_Adot3B__plus_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_int8
+GrB_Info GB_Asaxpy3B__plus_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_uint16.c b/Source/Generated/GB_AxB__plus_plus_uint16.c
index 0b4c4978c7..809b668eb5 100644
--- a/Source/Generated/GB_AxB__plus_plus_uint16.c
+++ b/Source/Generated/GB_AxB__plus_plus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_plus_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_plus_uint16
-// A*B function (heap):      GB_AheapB__plus_plus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_PLUS_PLUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_uint16
 GrB_Info GB_Adot2B__plus_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_uint16
 GrB_Info GB_Adot3B__plus_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_uint16
+GrB_Info GB_Asaxpy3B__plus_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_uint32.c b/Source/Generated/GB_AxB__plus_plus_uint32.c
index 7296d6b919..039d908b14 100644
--- a/Source/Generated/GB_AxB__plus_plus_uint32.c
+++ b/Source/Generated/GB_AxB__plus_plus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_plus_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_plus_uint32
-// A*B function (heap):      GB_AheapB__plus_plus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_PLUS_PLUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_uint32
 GrB_Info GB_Adot2B__plus_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_uint32
 GrB_Info GB_Adot3B__plus_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_uint32
+GrB_Info GB_Asaxpy3B__plus_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_uint64.c b/Source/Generated/GB_AxB__plus_plus_uint64.c
index af64b323f8..54837fdffc 100644
--- a/Source/Generated/GB_AxB__plus_plus_uint64.c
+++ b/Source/Generated/GB_AxB__plus_plus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_plus_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_plus_uint64
-// A*B function (heap):      GB_AheapB__plus_plus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x + y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x + y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_PLUS_PLUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_uint64
 GrB_Info GB_Adot2B__plus_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_uint64
 GrB_Info GB_Adot3B__plus_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_uint64
+GrB_Info GB_Asaxpy3B__plus_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_plus_uint8.c b/Source/Generated/GB_AxB__plus_plus_uint8.c
index fbc614a904..fa1797b8d9 100644
--- a/Source/Generated/GB_AxB__plus_plus_uint8.c
+++ b/Source/Generated/GB_AxB__plus_plus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_plus_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_plus_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_plus_uint8
-// A*B function (heap):      GB_AheapB__plus_plus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_plus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_plus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik + bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x + y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x + y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_PLUS_PLUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_plus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_plus_uint8
 GrB_Info GB_Adot2B__plus_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_plus_uint8
 GrB_Info GB_Adot3B__plus_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_plus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_plus_uint8
+GrB_Info GB_Asaxpy3B__plus_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_fp32.c b/Source/Generated/GB_AxB__plus_rdiv_fp32.c
index 338bf945f6..385d9e027c 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_fp32.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_fp32
-// A*B function (heap):      GB_AheapB__plus_rdiv_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (bkj / aik)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (y / x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (y / x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_RDIV_FP32 || GxB_NO_PLUS_RDIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_fp32
 GrB_Info GB_Adot2B__plus_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_fp32
 GrB_Info GB_Adot3B__plus_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_rdiv_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_fp64.c b/Source/Generated/GB_AxB__plus_rdiv_fp64.c
index 1577f82d05..f46b530292 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_fp64.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_fp64
-// A*B function (heap):      GB_AheapB__plus_rdiv_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (bkj / aik)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (y / x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (y / x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_RDIV_FP64 || GxB_NO_PLUS_RDIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_fp64
 GrB_Info GB_Adot2B__plus_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_fp64
 GrB_Info GB_Adot3B__plus_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_rdiv_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_rdiv_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_int16.c b/Source/Generated/GB_AxB__plus_rdiv_int16.c
index 18383afc1e..aa7891313e 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_int16.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_int16
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_int16
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_int16
-// A*B function (heap):      GB_AheapB__plus_rdiv_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 16)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 16) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_RDIV_INT16 || GxB_NO_PLUS_RDIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_int16
 GrB_Info GB_Adot2B__plus_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_int16
 GrB_Info GB_Adot3B__plus_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_int16
+GrB_Info GB_Asaxpy3B__plus_rdiv_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_int32.c b/Source/Generated/GB_AxB__plus_rdiv_int32.c
index 1659c00688..43d1889ddb 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_int32.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_int32
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_int32
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_int32
-// A*B function (heap):      GB_AheapB__plus_rdiv_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 32)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 32) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_RDIV_INT32 || GxB_NO_PLUS_RDIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_int32
 GrB_Info GB_Adot2B__plus_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_int32
 GrB_Info GB_Adot3B__plus_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_int32
+GrB_Info GB_Asaxpy3B__plus_rdiv_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_int64.c b/Source/Generated/GB_AxB__plus_rdiv_int64.c
index 8671235a5f..313e834023 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_int64.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_int64
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_int64
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_int64
-// A*B function (heap):      GB_AheapB__plus_rdiv_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 64)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 64) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_RDIV_INT64 || GxB_NO_PLUS_RDIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_int64
 GrB_Info GB_Adot2B__plus_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_int64
 GrB_Info GB_Adot3B__plus_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_int64
+GrB_Info GB_Asaxpy3B__plus_rdiv_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_int8.c b/Source/Generated/GB_AxB__plus_rdiv_int8.c
index 4a037aa93c..a886e1c729 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_int8.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_int8
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_int8
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_int8
-// A*B function (heap):      GB_AheapB__plus_rdiv_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 8)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 8) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_RDIV_INT8 || GxB_NO_PLUS_RDIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_int8
 GrB_Info GB_Adot2B__plus_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_int8
 GrB_Info GB_Adot3B__plus_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_int8
+GrB_Info GB_Asaxpy3B__plus_rdiv_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_uint16.c b/Source/Generated/GB_AxB__plus_rdiv_uint16.c
index 8d0a096dc2..d5c12b348f 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_uint16.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_uint16
-// A*B function (heap):      GB_AheapB__plus_rdiv_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 16)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 16) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_RDIV_UINT16 || GxB_NO_PLUS_RDIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_uint16
 GrB_Info GB_Adot2B__plus_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_uint16
 GrB_Info GB_Adot3B__plus_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_uint16
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_uint32.c b/Source/Generated/GB_AxB__plus_rdiv_uint32.c
index 7369e04271..0f3193e1f2 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_uint32.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_uint32
-// A*B function (heap):      GB_AheapB__plus_rdiv_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 32)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 32) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_RDIV_UINT32 || GxB_NO_PLUS_RDIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_uint32
 GrB_Info GB_Adot2B__plus_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_uint32
 GrB_Info GB_Adot3B__plus_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_uint32
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_uint64.c b/Source/Generated/GB_AxB__plus_rdiv_uint64.c
index ce72e42a76..7c4fe1ccb4 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_uint64.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_uint64
-// A*B function (heap):      GB_AheapB__plus_rdiv_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 64)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 64) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_RDIV_UINT64 || GxB_NO_PLUS_RDIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_uint64
 GrB_Info GB_Adot2B__plus_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_uint64
 GrB_Info GB_Adot3B__plus_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_uint64
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rdiv_uint8.c b/Source/Generated/GB_AxB__plus_rdiv_uint8.c
index 03828bb3fe..ccd7e7d08e 100644
--- a/Source/Generated/GB_AxB__plus_rdiv_uint8.c
+++ b/Source/Generated/GB_AxB__plus_rdiv_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rdiv_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_rdiv_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_rdiv_uint8
-// A*B function (heap):      GB_AheapB__plus_rdiv_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rdiv_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rdiv_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 8)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 8) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_RDIV_UINT8 || GxB_NO_PLUS_RDIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rdiv_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rdiv_uint8
 GrB_Info GB_Adot2B__plus_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rdiv_uint8
 GrB_Info GB_Adot3B__plus_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rdiv_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rdiv_uint8
+GrB_Info GB_Asaxpy3B__plus_rdiv_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_fp32.c b/Source/Generated/GB_AxB__plus_rminus_fp32.c
index 3e8b5f0543..e114c9b707 100644
--- a/Source/Generated/GB_AxB__plus_rminus_fp32.c
+++ b/Source/Generated/GB_AxB__plus_rminus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_fp32
-// A*B function (heap):      GB_AheapB__plus_rminus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (bkj - aik)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (y - x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (y - x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_RMINUS_FP32 || GxB_NO_PLUS_RMINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_fp32
 GrB_Info GB_Adot2B__plus_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_fp32
 GrB_Info GB_Adot3B__plus_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_rminus_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_rminus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_fp64.c b/Source/Generated/GB_AxB__plus_rminus_fp64.c
index f70e4b79d2..82a8bd8d58 100644
--- a/Source/Generated/GB_AxB__plus_rminus_fp64.c
+++ b/Source/Generated/GB_AxB__plus_rminus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_fp64
-// A*B function (heap):      GB_AheapB__plus_rminus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (bkj - aik)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (y - x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (y - x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_RMINUS_FP64 || GxB_NO_PLUS_RMINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_fp64
 GrB_Info GB_Adot2B__plus_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_fp64
 GrB_Info GB_Adot3B__plus_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_rminus_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_rminus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_int16.c b/Source/Generated/GB_AxB__plus_rminus_int16.c
index 2a50602674..39b329d532 100644
--- a/Source/Generated/GB_AxB__plus_rminus_int16.c
+++ b/Source/Generated/GB_AxB__plus_rminus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_int16
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_int16
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_int16
-// A*B function (heap):      GB_AheapB__plus_rminus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_RMINUS_INT16 || GxB_NO_PLUS_RMINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_int16
 GrB_Info GB_Adot2B__plus_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_int16
 GrB_Info GB_Adot3B__plus_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_int16
+GrB_Info GB_Asaxpy3B__plus_rminus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_int32.c b/Source/Generated/GB_AxB__plus_rminus_int32.c
index 934d3f2f10..f9ffd2cae3 100644
--- a/Source/Generated/GB_AxB__plus_rminus_int32.c
+++ b/Source/Generated/GB_AxB__plus_rminus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_int32
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_int32
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_int32
-// A*B function (heap):      GB_AheapB__plus_rminus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_RMINUS_INT32 || GxB_NO_PLUS_RMINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_int32
 GrB_Info GB_Adot2B__plus_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_int32
 GrB_Info GB_Adot3B__plus_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_int32
+GrB_Info GB_Asaxpy3B__plus_rminus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_int64.c b/Source/Generated/GB_AxB__plus_rminus_int64.c
index 5bc8399a50..4894e4b88f 100644
--- a/Source/Generated/GB_AxB__plus_rminus_int64.c
+++ b/Source/Generated/GB_AxB__plus_rminus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_int64
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_int64
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_int64
-// A*B function (heap):      GB_AheapB__plus_rminus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (y - x) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (y - x) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_RMINUS_INT64 || GxB_NO_PLUS_RMINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_int64
 GrB_Info GB_Adot2B__plus_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_int64
 GrB_Info GB_Adot3B__plus_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_int64
+GrB_Info GB_Asaxpy3B__plus_rminus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_int8.c b/Source/Generated/GB_AxB__plus_rminus_int8.c
index 2f6e105599..9c57ba7c9e 100644
--- a/Source/Generated/GB_AxB__plus_rminus_int8.c
+++ b/Source/Generated/GB_AxB__plus_rminus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_int8
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_int8
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_int8
-// A*B function (heap):      GB_AheapB__plus_rminus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_RMINUS_INT8 || GxB_NO_PLUS_RMINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_int8
 GrB_Info GB_Adot2B__plus_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_int8
 GrB_Info GB_Adot3B__plus_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_int8
+GrB_Info GB_Asaxpy3B__plus_rminus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_uint16.c b/Source/Generated/GB_AxB__plus_rminus_uint16.c
index 85f087d907..7a8355bb77 100644
--- a/Source/Generated/GB_AxB__plus_rminus_uint16.c
+++ b/Source/Generated/GB_AxB__plus_rminus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_uint16
-// A*B function (heap):      GB_AheapB__plus_rminus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_RMINUS_UINT16 || GxB_NO_PLUS_RMINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_uint16
 GrB_Info GB_Adot2B__plus_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_uint16
 GrB_Info GB_Adot3B__plus_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_uint16
+GrB_Info GB_Asaxpy3B__plus_rminus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_uint32.c b/Source/Generated/GB_AxB__plus_rminus_uint32.c
index 7067d25e5a..b0547ec100 100644
--- a/Source/Generated/GB_AxB__plus_rminus_uint32.c
+++ b/Source/Generated/GB_AxB__plus_rminus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_uint32
-// A*B function (heap):      GB_AheapB__plus_rminus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_RMINUS_UINT32 || GxB_NO_PLUS_RMINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_uint32
 GrB_Info GB_Adot2B__plus_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_uint32
 GrB_Info GB_Adot3B__plus_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_uint32
+GrB_Info GB_Asaxpy3B__plus_rminus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_uint64.c b/Source/Generated/GB_AxB__plus_rminus_uint64.c
index 5653a3e193..d1943f5823 100644
--- a/Source/Generated/GB_AxB__plus_rminus_uint64.c
+++ b/Source/Generated/GB_AxB__plus_rminus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_uint64
-// A*B function (heap):      GB_AheapB__plus_rminus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (y - x) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (y - x) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_RMINUS_UINT64 || GxB_NO_PLUS_RMINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_uint64
 GrB_Info GB_Adot2B__plus_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_uint64
 GrB_Info GB_Adot3B__plus_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_uint64
+GrB_Info GB_Asaxpy3B__plus_rminus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_rminus_uint8.c b/Source/Generated/GB_AxB__plus_rminus_uint8.c
index 20c07ad0fc..cf7f1d791b 100644
--- a/Source/Generated/GB_AxB__plus_rminus_uint8.c
+++ b/Source/Generated/GB_AxB__plus_rminus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_rminus_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_rminus_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_rminus_uint8
-// A*B function (heap):      GB_AheapB__plus_rminus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_rminus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_rminus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (bkj - aik) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (y - x) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (y - x) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_RMINUS_UINT8 || GxB_NO_PLUS_RMINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_rminus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_rminus_uint8
 GrB_Info GB_Adot2B__plus_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_rminus_uint8
 GrB_Info GB_Adot3B__plus_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_rminus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_rminus_uint8
+GrB_Info GB_Asaxpy3B__plus_rminus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_fp32.c b/Source/Generated/GB_AxB__plus_second_fp32.c
index 21c9324530..806ecddd48 100644
--- a/Source/Generated/GB_AxB__plus_second_fp32.c
+++ b/Source/Generated/GB_AxB__plus_second_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_second_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_second_fp32
-// A*B function (heap):      GB_AheapB__plus_second_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_SECOND_FP32 || GxB_NO_PLUS_SECOND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_fp32
 GrB_Info GB_Adot2B__plus_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_fp32
 GrB_Info GB_Adot3B__plus_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_fp64.c b/Source/Generated/GB_AxB__plus_second_fp64.c
index 22ac368083..f41515700b 100644
--- a/Source/Generated/GB_AxB__plus_second_fp64.c
+++ b/Source/Generated/GB_AxB__plus_second_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_second_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_second_fp64
-// A*B function (heap):      GB_AheapB__plus_second_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_SECOND_FP64 || GxB_NO_PLUS_SECOND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_fp64
 GrB_Info GB_Adot2B__plus_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_fp64
 GrB_Info GB_Adot3B__plus_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_int16.c b/Source/Generated/GB_AxB__plus_second_int16.c
index f0e2df4769..836d57215a 100644
--- a/Source/Generated/GB_AxB__plus_second_int16.c
+++ b/Source/Generated/GB_AxB__plus_second_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_int16
 // A'*B function (dot2):     GB_Adot2B__plus_second_int16
 // A'*B function (dot3):     GB_Adot3B__plus_second_int16
-// A*B function (heap):      GB_AheapB__plus_second_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_SECOND_INT16 || GxB_NO_PLUS_SECOND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_int16
 GrB_Info GB_Adot2B__plus_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_int16
 GrB_Info GB_Adot3B__plus_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_int32.c b/Source/Generated/GB_AxB__plus_second_int32.c
index 864c662fef..3b44ae1409 100644
--- a/Source/Generated/GB_AxB__plus_second_int32.c
+++ b/Source/Generated/GB_AxB__plus_second_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_int32
 // A'*B function (dot2):     GB_Adot2B__plus_second_int32
 // A'*B function (dot3):     GB_Adot3B__plus_second_int32
-// A*B function (heap):      GB_AheapB__plus_second_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_SECOND_INT32 || GxB_NO_PLUS_SECOND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_int32
 GrB_Info GB_Adot2B__plus_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_int32
 GrB_Info GB_Adot3B__plus_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_int64.c b/Source/Generated/GB_AxB__plus_second_int64.c
index b5bccd36b8..1949a969f6 100644
--- a/Source/Generated/GB_AxB__plus_second_int64.c
+++ b/Source/Generated/GB_AxB__plus_second_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_int64
 // A'*B function (dot2):     GB_Adot2B__plus_second_int64
 // A'*B function (dot3):     GB_Adot3B__plus_second_int64
-// A*B function (heap):      GB_AheapB__plus_second_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_SECOND_INT64 || GxB_NO_PLUS_SECOND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_int64
 GrB_Info GB_Adot2B__plus_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_int64
 GrB_Info GB_Adot3B__plus_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_second_int64
+GrB_Info GB_Asaxpy3B__plus_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_int8.c b/Source/Generated/GB_AxB__plus_second_int8.c
index 23432dcf96..1f223c13b5 100644
--- a/Source/Generated/GB_AxB__plus_second_int8.c
+++ b/Source/Generated/GB_AxB__plus_second_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_int8
 // A'*B function (dot2):     GB_Adot2B__plus_second_int8
 // A'*B function (dot3):     GB_Adot3B__plus_second_int8
-// A*B function (heap):      GB_AheapB__plus_second_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_SECOND_INT8 || GxB_NO_PLUS_SECOND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_int8
 GrB_Info GB_Adot2B__plus_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_int8
 GrB_Info GB_Adot3B__plus_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_uint16.c b/Source/Generated/GB_AxB__plus_second_uint16.c
index cdea1c9412..e2a0ce0792 100644
--- a/Source/Generated/GB_AxB__plus_second_uint16.c
+++ b/Source/Generated/GB_AxB__plus_second_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_second_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_second_uint16
-// A*B function (heap):      GB_AheapB__plus_second_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_SECOND_UINT16 || GxB_NO_PLUS_SECOND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_uint16
 GrB_Info GB_Adot2B__plus_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_uint16
 GrB_Info GB_Adot3B__plus_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_uint32.c b/Source/Generated/GB_AxB__plus_second_uint32.c
index 4c6c1ef676..b1c3290891 100644
--- a/Source/Generated/GB_AxB__plus_second_uint32.c
+++ b/Source/Generated/GB_AxB__plus_second_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_second_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_second_uint32
-// A*B function (heap):      GB_AheapB__plus_second_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_SECOND_UINT32 || GxB_NO_PLUS_SECOND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_uint32
 GrB_Info GB_Adot2B__plus_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_uint32
 GrB_Info GB_Adot3B__plus_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_uint64.c b/Source/Generated/GB_AxB__plus_second_uint64.c
index 662a4d79aa..c1e56f005f 100644
--- a/Source/Generated/GB_AxB__plus_second_uint64.c
+++ b/Source/Generated/GB_AxB__plus_second_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_second_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_second_uint64
-// A*B function (heap):      GB_AheapB__plus_second_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_SECOND_UINT64 || GxB_NO_PLUS_SECOND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_uint64
 GrB_Info GB_Adot2B__plus_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_uint64
 GrB_Info GB_Adot3B__plus_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_second_uint64
+GrB_Info GB_Asaxpy3B__plus_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_second_uint8.c b/Source/Generated/GB_AxB__plus_second_uint8.c
index d12b300bb3..099eb7440f 100644
--- a/Source/Generated/GB_AxB__plus_second_uint8.c
+++ b/Source/Generated/GB_AxB__plus_second_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_second_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_second_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_second_uint8
-// A*B function (heap):      GB_AheapB__plus_second_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_second_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_second_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += bkj
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_SECOND_UINT8 || GxB_NO_PLUS_SECOND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_second_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_second_uint8
 GrB_Info GB_Adot2B__plus_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_second_uint8
 GrB_Info GB_Adot3B__plus_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_second_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_second_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_fp32.c b/Source/Generated/GB_AxB__plus_times_fp32.c
index 82dcc9274c..8d530b8bd7 100644
--- a/Source/Generated/GB_AxB__plus_times_fp32.c
+++ b/Source/Generated/GB_AxB__plus_times_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_fp32
 // A'*B function (dot2):     GB_Adot2B__plus_times_fp32
 // A'*B function (dot3):     GB_Adot3B__plus_times_fp32
-// A*B function (heap):      GB_AheapB__plus_times_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik * bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x * y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x * y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_PLUS_TIMES_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_fp32
 GrB_Info GB_Adot2B__plus_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_fp32
 GrB_Info GB_Adot3B__plus_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_times_fp32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_times_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_fp64.c b/Source/Generated/GB_AxB__plus_times_fp64.c
index bc0eb8a079..ea4a6bdaba 100644
--- a/Source/Generated/GB_AxB__plus_times_fp64.c
+++ b/Source/Generated/GB_AxB__plus_times_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_fp64
 // A'*B function (dot2):     GB_Adot2B__plus_times_fp64
 // A'*B function (dot3):     GB_Adot3B__plus_times_fp64
-// A*B function (heap):      GB_AheapB__plus_times_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij += (aik * bkj)
 // Identity: 0
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z += (x * y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z += (x * y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_PLUS_TIMES_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_fp64
 GrB_Info GB_Adot2B__plus_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_fp64
 GrB_Info GB_Adot3B__plus_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__plus_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__plus_times_fp64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__plus_times_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_int16.c b/Source/Generated/GB_AxB__plus_times_int16.c
index 0947191db5..1c390360a8 100644
--- a/Source/Generated/GB_AxB__plus_times_int16.c
+++ b/Source/Generated/GB_AxB__plus_times_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_int16
 // A'*B function (dot2):     GB_Adot2B__plus_times_int16
 // A'*B function (dot3):     GB_Adot3B__plus_times_int16
-// A*B function (heap):      GB_AheapB__plus_times_int16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_PLUS_TIMES_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_int16
 GrB_Info GB_Adot2B__plus_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_int16
 GrB_Info GB_Adot3B__plus_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_int16
+GrB_Info GB_Asaxpy3B__plus_times_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_int32.c b/Source/Generated/GB_AxB__plus_times_int32.c
index 4b0e1aadf1..0a49533c3c 100644
--- a/Source/Generated/GB_AxB__plus_times_int32.c
+++ b/Source/Generated/GB_AxB__plus_times_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_int32
 // A'*B function (dot2):     GB_Adot2B__plus_times_int32
 // A'*B function (dot3):     GB_Adot3B__plus_times_int32
-// A*B function (heap):      GB_AheapB__plus_times_int32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_PLUS_TIMES_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_int32
 GrB_Info GB_Adot2B__plus_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_int32
 GrB_Info GB_Adot3B__plus_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_int32
+GrB_Info GB_Asaxpy3B__plus_times_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_int64.c b/Source/Generated/GB_AxB__plus_times_int64.c
index 316ff2c5c6..cdefcf295a 100644
--- a/Source/Generated/GB_AxB__plus_times_int64.c
+++ b/Source/Generated/GB_AxB__plus_times_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_int64
 // A'*B function (dot2):     GB_Adot2B__plus_times_int64
 // A'*B function (dot3):     GB_Adot3B__plus_times_int64
-// A*B function (heap):      GB_AheapB__plus_times_int64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x * y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x * y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_PLUS_TIMES_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_int64
 GrB_Info GB_Adot2B__plus_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_int64
 GrB_Info GB_Adot3B__plus_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_int64
+GrB_Info GB_Asaxpy3B__plus_times_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_int8.c b/Source/Generated/GB_AxB__plus_times_int8.c
index 26ec64db8f..3a631a06e5 100644
--- a/Source/Generated/GB_AxB__plus_times_int8.c
+++ b/Source/Generated/GB_AxB__plus_times_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_int8
 // A'*B function (dot2):     GB_Adot2B__plus_times_int8
 // A'*B function (dot3):     GB_Adot3B__plus_times_int8
-// A*B function (heap):      GB_AheapB__plus_times_int8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_PLUS_TIMES_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_int8
 GrB_Info GB_Adot2B__plus_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_int8
 GrB_Info GB_Adot3B__plus_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_int8
+GrB_Info GB_Asaxpy3B__plus_times_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_uint16.c b/Source/Generated/GB_AxB__plus_times_uint16.c
index e1747ea475..d5e33fa896 100644
--- a/Source/Generated/GB_AxB__plus_times_uint16.c
+++ b/Source/Generated/GB_AxB__plus_times_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_uint16
 // A'*B function (dot2):     GB_Adot2B__plus_times_uint16
 // A'*B function (dot3):     GB_Adot3B__plus_times_uint16
-// A*B function (heap):      GB_AheapB__plus_times_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_PLUS_TIMES_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_uint16
 GrB_Info GB_Adot2B__plus_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_uint16
 GrB_Info GB_Adot3B__plus_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_uint16
+GrB_Info GB_Asaxpy3B__plus_times_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_uint32.c b/Source/Generated/GB_AxB__plus_times_uint32.c
index 95efb4aade..fdf4ef47d2 100644
--- a/Source/Generated/GB_AxB__plus_times_uint32.c
+++ b/Source/Generated/GB_AxB__plus_times_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_uint32
 // A'*B function (dot2):     GB_Adot2B__plus_times_uint32
 // A'*B function (dot3):     GB_Adot3B__plus_times_uint32
-// A*B function (heap):      GB_AheapB__plus_times_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_PLUS_TIMES_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_uint32
 GrB_Info GB_Adot2B__plus_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_uint32
 GrB_Info GB_Adot3B__plus_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_uint32
+GrB_Info GB_Asaxpy3B__plus_times_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_uint64.c b/Source/Generated/GB_AxB__plus_times_uint64.c
index c8f2a932d5..945d3e30a4 100644
--- a/Source/Generated/GB_AxB__plus_times_uint64.c
+++ b/Source/Generated/GB_AxB__plus_times_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_uint64
 // A'*B function (dot2):     GB_Adot2B__plus_times_uint64
 // A'*B function (dot3):     GB_Adot3B__plus_times_uint64
-// A*B function (heap):      GB_AheapB__plus_times_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x * y) ; z += x_op_y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x * y) ; z += x_op_y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_PLUS_TIMES_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_uint64
 GrB_Info GB_Adot2B__plus_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_uint64
 GrB_Info GB_Adot3B__plus_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_uint64
+GrB_Info GB_Asaxpy3B__plus_times_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__plus_times_uint8.c b/Source/Generated/GB_AxB__plus_times_uint8.c
index fc517f31f0..d260d0c63b 100644
--- a/Source/Generated/GB_AxB__plus_times_uint8.c
+++ b/Source/Generated/GB_AxB__plus_times_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__plus_times_uint8
 // A'*B function (dot2):     GB_Adot2B__plus_times_uint8
 // A'*B function (dot3):     GB_Adot3B__plus_times_uint8
-// A*B function (heap):      GB_AheapB__plus_times_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__plus_times_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__plus_times_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij += x_op_y
+// Add:      cij += z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik * bkj) ; cij += x_op_y
 // Identity: 0
 // Terminal: ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x * y) ; z += x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x * y) ; z += x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     0
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] += t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x + y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] += t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_PLUS_TIMES_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__plus_times_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__plus_times_uint8
 GrB_Info GB_Adot2B__plus_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__plus_times_uint8
 GrB_Info GB_Adot3B__plus_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__plus_times_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__plus_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__plus_times_uint8
+GrB_Info GB_Asaxpy3B__plus_times_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_fp32.c b/Source/Generated/GB_AxB__times_div_fp32.c
index 783012ee65..bc7f118f3d 100644
--- a/Source/Generated/GB_AxB__times_div_fp32.c
+++ b/Source/Generated/GB_AxB__times_div_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_fp32
 // A'*B function (dot2):     GB_Adot2B__times_div_fp32
 // A'*B function (dot3):     GB_Adot3B__times_div_fp32
-// A*B function (heap):      GB_AheapB__times_div_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik / bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x / y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x / y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_DIV_FP32 || GxB_NO_TIMES_DIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_div_fp32
 GrB_Info GB_Adot2B__times_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_fp32
 GrB_Info GB_Adot3B__times_div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_div_fp32
+GrB_Info GB_Asaxpy3B__times_div_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_fp64.c b/Source/Generated/GB_AxB__times_div_fp64.c
index d93b364a89..d555f924d4 100644
--- a/Source/Generated/GB_AxB__times_div_fp64.c
+++ b/Source/Generated/GB_AxB__times_div_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_fp64
 // A'*B function (dot2):     GB_Adot2B__times_div_fp64
 // A'*B function (dot3):     GB_Adot3B__times_div_fp64
-// A*B function (heap):      GB_AheapB__times_div_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik / bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik / bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x / y) ;
+#define GB_MULT(z, x, y) \
+    z = (x / y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x / y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x / y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_DIV_FP64 || GxB_NO_TIMES_DIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_div_fp64
 GrB_Info GB_Adot2B__times_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_fp64
 GrB_Info GB_Adot3B__times_div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_div_fp64
+GrB_Info GB_Asaxpy3B__times_div_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_int16.c b/Source/Generated/GB_AxB__times_div_int16.c
index 769bec54ca..6f3d2319de 100644
--- a/Source/Generated/GB_AxB__times_div_int16.c
+++ b/Source/Generated/GB_AxB__times_div_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_int16
 // A'*B function (dot2):     GB_Adot2B__times_div_int16
 // A'*B function (dot3):     GB_Adot3B__times_div_int16
-// A*B function (heap):      GB_AheapB__times_div_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 16)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 16) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (x, y, 16) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_DIV_INT16 || GxB_NO_TIMES_DIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_int16
 GrB_Info GB_Adot2B__times_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_int16
 GrB_Info GB_Adot3B__times_div_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_int32.c b/Source/Generated/GB_AxB__times_div_int32.c
index 9dd58cba5d..c98ac07791 100644
--- a/Source/Generated/GB_AxB__times_div_int32.c
+++ b/Source/Generated/GB_AxB__times_div_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_int32
 // A'*B function (dot2):     GB_Adot2B__times_div_int32
 // A'*B function (dot3):     GB_Adot3B__times_div_int32
-// A*B function (heap):      GB_AheapB__times_div_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 32)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 32) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (x, y, 32) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_DIV_INT32 || GxB_NO_TIMES_DIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_int32
 GrB_Info GB_Adot2B__times_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_int32
 GrB_Info GB_Adot3B__times_div_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_int64.c b/Source/Generated/GB_AxB__times_div_int64.c
index 08f43880c4..d10946a1b9 100644
--- a/Source/Generated/GB_AxB__times_div_int64.c
+++ b/Source/Generated/GB_AxB__times_div_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_int64
 // A'*B function (dot2):     GB_Adot2B__times_div_int64
 // A'*B function (dot3):     GB_Adot3B__times_div_int64
-// A*B function (heap):      GB_AheapB__times_div_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 64)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 64) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (x, y, 64) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_DIV_INT64 || GxB_NO_TIMES_DIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_int64
 GrB_Info GB_Adot2B__times_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_int64
 GrB_Info GB_Adot3B__times_div_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_int8.c b/Source/Generated/GB_AxB__times_div_int8.c
index 44028d39f7..088a63ee2b 100644
--- a/Source/Generated/GB_AxB__times_div_int8.c
+++ b/Source/Generated/GB_AxB__times_div_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_int8
 // A'*B function (dot2):     GB_Adot2B__times_div_int8
 // A'*B function (dot3):     GB_Adot3B__times_div_int8
-// A*B function (heap):      GB_AheapB__times_div_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (aik, bkj, 8)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (aik, bkj, 8) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (x, y, 8) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_DIV_INT8 || GxB_NO_TIMES_DIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_int8
 GrB_Info GB_Adot2B__times_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_int8
 GrB_Info GB_Adot3B__times_div_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_uint16.c b/Source/Generated/GB_AxB__times_div_uint16.c
index 85a214d8da..1c4cde0fd3 100644
--- a/Source/Generated/GB_AxB__times_div_uint16.c
+++ b/Source/Generated/GB_AxB__times_div_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_uint16
 // A'*B function (dot2):     GB_Adot2B__times_div_uint16
 // A'*B function (dot3):     GB_Adot3B__times_div_uint16
-// A*B function (heap):      GB_AheapB__times_div_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 16)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 16) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (x, y, 16) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_DIV_UINT16 || GxB_NO_TIMES_DIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_uint16
 GrB_Info GB_Adot2B__times_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_uint16
 GrB_Info GB_Adot3B__times_div_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_uint32.c b/Source/Generated/GB_AxB__times_div_uint32.c
index 5978a62ce1..a0ff497551 100644
--- a/Source/Generated/GB_AxB__times_div_uint32.c
+++ b/Source/Generated/GB_AxB__times_div_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_uint32
 // A'*B function (dot2):     GB_Adot2B__times_div_uint32
 // A'*B function (dot3):     GB_Adot3B__times_div_uint32
-// A*B function (heap):      GB_AheapB__times_div_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 32)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 32) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (x, y, 32) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_DIV_UINT32 || GxB_NO_TIMES_DIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_uint32
 GrB_Info GB_Adot2B__times_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_uint32
 GrB_Info GB_Adot3B__times_div_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_uint64.c b/Source/Generated/GB_AxB__times_div_uint64.c
index c0b144e16a..a943be69a2 100644
--- a/Source/Generated/GB_AxB__times_div_uint64.c
+++ b/Source/Generated/GB_AxB__times_div_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_uint64
 // A'*B function (dot2):     GB_Adot2B__times_div_uint64
 // A'*B function (dot3):     GB_Adot3B__times_div_uint64
-// A*B function (heap):      GB_AheapB__times_div_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 64)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 64) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (x, y, 64) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_DIV_UINT64 || GxB_NO_TIMES_DIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_uint64
 GrB_Info GB_Adot2B__times_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_uint64
 GrB_Info GB_Adot3B__times_div_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_div_uint8.c b/Source/Generated/GB_AxB__times_div_uint8.c
index 382158d30b..4b5a072452 100644
--- a/Source/Generated/GB_AxB__times_div_uint8.c
+++ b/Source/Generated/GB_AxB__times_div_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_div_uint8
 // A'*B function (dot2):     GB_Adot2B__times_div_uint8
 // A'*B function (dot3):     GB_Adot3B__times_div_uint8
-// A*B function (heap):      GB_AheapB__times_div_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_div_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_div_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (aik, bkj, 8)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (aik, bkj, 8) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (x, y, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (x, y, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (x, y, 8) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_DIV_UINT8 || GxB_NO_TIMES_DIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_div_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_div_uint8
 GrB_Info GB_Adot2B__times_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_div_uint8
 GrB_Info GB_Adot3B__times_div_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_div_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_div_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_div_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_fp32.c b/Source/Generated/GB_AxB__times_first_fp32.c
index 4c4924b3a2..3937f02845 100644
--- a/Source/Generated/GB_AxB__times_first_fp32.c
+++ b/Source/Generated/GB_AxB__times_first_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_fp32
 // A'*B function (dot2):     GB_Adot2B__times_first_fp32
 // A'*B function (dot3):     GB_Adot3B__times_first_fp32
-// A*B function (heap):      GB_AheapB__times_first_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_FIRST_FP32 || GxB_NO_TIMES_FIRST_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_fp32
 GrB_Info GB_Adot2B__times_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_fp32
 GrB_Info GB_Adot3B__times_first_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_fp32
+GrB_Info GB_Asaxpy3B__times_first_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_fp64.c b/Source/Generated/GB_AxB__times_first_fp64.c
index fa8e940636..ae7c146acb 100644
--- a/Source/Generated/GB_AxB__times_first_fp64.c
+++ b/Source/Generated/GB_AxB__times_first_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_fp64
 // A'*B function (dot2):     GB_Adot2B__times_first_fp64
 // A'*B function (dot3):     GB_Adot3B__times_first_fp64
-// A*B function (heap):      GB_AheapB__times_first_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_FIRST_FP64 || GxB_NO_TIMES_FIRST_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_fp64
 GrB_Info GB_Adot2B__times_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_fp64
 GrB_Info GB_Adot3B__times_first_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_fp64
+GrB_Info GB_Asaxpy3B__times_first_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_int16.c b/Source/Generated/GB_AxB__times_first_int16.c
index defa584786..8153bd872c 100644
--- a/Source/Generated/GB_AxB__times_first_int16.c
+++ b/Source/Generated/GB_AxB__times_first_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_int16
 // A'*B function (dot2):     GB_Adot2B__times_first_int16
 // A'*B function (dot3):     GB_Adot3B__times_first_int16
-// A*B function (heap):      GB_AheapB__times_first_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_FIRST_INT16 || GxB_NO_TIMES_FIRST_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_int16
 GrB_Info GB_Adot2B__times_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_int16
 GrB_Info GB_Adot3B__times_first_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_int16
+GrB_Info GB_Asaxpy3B__times_first_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_int32.c b/Source/Generated/GB_AxB__times_first_int32.c
index 7cb146f736..5e34ed33bb 100644
--- a/Source/Generated/GB_AxB__times_first_int32.c
+++ b/Source/Generated/GB_AxB__times_first_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_int32
 // A'*B function (dot2):     GB_Adot2B__times_first_int32
 // A'*B function (dot3):     GB_Adot3B__times_first_int32
-// A*B function (heap):      GB_AheapB__times_first_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_FIRST_INT32 || GxB_NO_TIMES_FIRST_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_int32
 GrB_Info GB_Adot2B__times_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_int32
 GrB_Info GB_Adot3B__times_first_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_int32
+GrB_Info GB_Asaxpy3B__times_first_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_int64.c b/Source/Generated/GB_AxB__times_first_int64.c
index 914c7d1903..db2e80da41 100644
--- a/Source/Generated/GB_AxB__times_first_int64.c
+++ b/Source/Generated/GB_AxB__times_first_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_int64
 // A'*B function (dot2):     GB_Adot2B__times_first_int64
 // A'*B function (dot3):     GB_Adot3B__times_first_int64
-// A*B function (heap):      GB_AheapB__times_first_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_FIRST_INT64 || GxB_NO_TIMES_FIRST_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_int64
 GrB_Info GB_Adot2B__times_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_int64
 GrB_Info GB_Adot3B__times_first_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_int64
+GrB_Info GB_Asaxpy3B__times_first_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_int8.c b/Source/Generated/GB_AxB__times_first_int8.c
index cfa98193af..99e763dbf7 100644
--- a/Source/Generated/GB_AxB__times_first_int8.c
+++ b/Source/Generated/GB_AxB__times_first_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_int8
 // A'*B function (dot2):     GB_Adot2B__times_first_int8
 // A'*B function (dot3):     GB_Adot3B__times_first_int8
-// A*B function (heap):      GB_AheapB__times_first_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_FIRST_INT8 || GxB_NO_TIMES_FIRST_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_int8
 GrB_Info GB_Adot2B__times_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_int8
 GrB_Info GB_Adot3B__times_first_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_int8
+GrB_Info GB_Asaxpy3B__times_first_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_uint16.c b/Source/Generated/GB_AxB__times_first_uint16.c
index 6a5350dcf8..9359cd0809 100644
--- a/Source/Generated/GB_AxB__times_first_uint16.c
+++ b/Source/Generated/GB_AxB__times_first_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_uint16
 // A'*B function (dot2):     GB_Adot2B__times_first_uint16
 // A'*B function (dot3):     GB_Adot3B__times_first_uint16
-// A*B function (heap):      GB_AheapB__times_first_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_FIRST_UINT16 || GxB_NO_TIMES_FIRST_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_uint16
 GrB_Info GB_Adot2B__times_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_uint16
 GrB_Info GB_Adot3B__times_first_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_uint16
+GrB_Info GB_Asaxpy3B__times_first_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_uint32.c b/Source/Generated/GB_AxB__times_first_uint32.c
index 68782e0884..266c5e473e 100644
--- a/Source/Generated/GB_AxB__times_first_uint32.c
+++ b/Source/Generated/GB_AxB__times_first_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_uint32
 // A'*B function (dot2):     GB_Adot2B__times_first_uint32
 // A'*B function (dot3):     GB_Adot3B__times_first_uint32
-// A*B function (heap):      GB_AheapB__times_first_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_FIRST_UINT32 || GxB_NO_TIMES_FIRST_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_uint32
 GrB_Info GB_Adot2B__times_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_uint32
 GrB_Info GB_Adot3B__times_first_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_uint32
+GrB_Info GB_Asaxpy3B__times_first_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_uint64.c b/Source/Generated/GB_AxB__times_first_uint64.c
index bb4d595cdf..c293219203 100644
--- a/Source/Generated/GB_AxB__times_first_uint64.c
+++ b/Source/Generated/GB_AxB__times_first_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_uint64
 // A'*B function (dot2):     GB_Adot2B__times_first_uint64
 // A'*B function (dot3):     GB_Adot3B__times_first_uint64
-// A*B function (heap):      GB_AheapB__times_first_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_FIRST_UINT64 || GxB_NO_TIMES_FIRST_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_uint64
 GrB_Info GB_Adot2B__times_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_uint64
 GrB_Info GB_Adot3B__times_first_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_uint64
+GrB_Info GB_Asaxpy3B__times_first_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_first_uint8.c b/Source/Generated/GB_AxB__times_first_uint8.c
index 4359d83653..e45e2fcab5 100644
--- a/Source/Generated/GB_AxB__times_first_uint8.c
+++ b/Source/Generated/GB_AxB__times_first_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_first_uint8
 // A'*B function (dot2):     GB_Adot2B__times_first_uint8
 // A'*B function (dot3):     GB_Adot3B__times_first_uint8
-// A*B function (heap):      GB_AheapB__times_first_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_first_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_first_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = aik
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= aik
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = x ;
+#define GB_MULT(z, x, y) \
+    z = x
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= x ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= x
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_FIRST_UINT8 || GxB_NO_TIMES_FIRST_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_first_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_first_uint8
 GrB_Info GB_Adot2B__times_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_first_uint8
 GrB_Info GB_Adot3B__times_first_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_first_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_first_uint8
+GrB_Info GB_Asaxpy3B__times_first_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_fp32.c b/Source/Generated/GB_AxB__times_iseq_fp32.c
index ad5a373f0e..1644b50b80 100644
--- a/Source/Generated/GB_AxB__times_iseq_fp32.c
+++ b/Source/Generated/GB_AxB__times_iseq_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_fp32
 // A'*B function (dot2):     GB_Adot2B__times_iseq_fp32
 // A'*B function (dot3):     GB_Adot3B__times_iseq_fp32
-// A*B function (heap):      GB_AheapB__times_iseq_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISEQ_FP32 || GxB_NO_TIMES_ISEQ_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_fp32
 GrB_Info GB_Adot2B__times_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_fp32
 GrB_Info GB_Adot3B__times_iseq_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_fp32
+GrB_Info GB_Asaxpy3B__times_iseq_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_fp64.c b/Source/Generated/GB_AxB__times_iseq_fp64.c
index 2162821dd5..82555ec3fd 100644
--- a/Source/Generated/GB_AxB__times_iseq_fp64.c
+++ b/Source/Generated/GB_AxB__times_iseq_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_fp64
 // A'*B function (dot2):     GB_Adot2B__times_iseq_fp64
 // A'*B function (dot3):     GB_Adot3B__times_iseq_fp64
-// A*B function (heap):      GB_AheapB__times_iseq_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISEQ_FP64 || GxB_NO_TIMES_ISEQ_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_fp64
 GrB_Info GB_Adot2B__times_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_fp64
 GrB_Info GB_Adot3B__times_iseq_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_fp64
+GrB_Info GB_Asaxpy3B__times_iseq_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_int16.c b/Source/Generated/GB_AxB__times_iseq_int16.c
index c5f0dd20d8..7beac50183 100644
--- a/Source/Generated/GB_AxB__times_iseq_int16.c
+++ b/Source/Generated/GB_AxB__times_iseq_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_int16
 // A'*B function (dot2):     GB_Adot2B__times_iseq_int16
 // A'*B function (dot3):     GB_Adot3B__times_iseq_int16
-// A*B function (heap):      GB_AheapB__times_iseq_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISEQ_INT16 || GxB_NO_TIMES_ISEQ_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_int16
 GrB_Info GB_Adot2B__times_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_int16
 GrB_Info GB_Adot3B__times_iseq_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_int16
+GrB_Info GB_Asaxpy3B__times_iseq_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_int32.c b/Source/Generated/GB_AxB__times_iseq_int32.c
index 4991820b2a..77d05632fd 100644
--- a/Source/Generated/GB_AxB__times_iseq_int32.c
+++ b/Source/Generated/GB_AxB__times_iseq_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_int32
 // A'*B function (dot2):     GB_Adot2B__times_iseq_int32
 // A'*B function (dot3):     GB_Adot3B__times_iseq_int32
-// A*B function (heap):      GB_AheapB__times_iseq_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISEQ_INT32 || GxB_NO_TIMES_ISEQ_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_int32
 GrB_Info GB_Adot2B__times_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_int32
 GrB_Info GB_Adot3B__times_iseq_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_int32
+GrB_Info GB_Asaxpy3B__times_iseq_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_int64.c b/Source/Generated/GB_AxB__times_iseq_int64.c
index 81116395f2..20d693beb3 100644
--- a/Source/Generated/GB_AxB__times_iseq_int64.c
+++ b/Source/Generated/GB_AxB__times_iseq_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_int64
 // A'*B function (dot2):     GB_Adot2B__times_iseq_int64
 // A'*B function (dot3):     GB_Adot3B__times_iseq_int64
-// A*B function (heap):      GB_AheapB__times_iseq_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISEQ_INT64 || GxB_NO_TIMES_ISEQ_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_int64
 GrB_Info GB_Adot2B__times_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_int64
 GrB_Info GB_Adot3B__times_iseq_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_int64
+GrB_Info GB_Asaxpy3B__times_iseq_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_int8.c b/Source/Generated/GB_AxB__times_iseq_int8.c
index a98c54e603..9a29477ad1 100644
--- a/Source/Generated/GB_AxB__times_iseq_int8.c
+++ b/Source/Generated/GB_AxB__times_iseq_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_int8
 // A'*B function (dot2):     GB_Adot2B__times_iseq_int8
 // A'*B function (dot3):     GB_Adot3B__times_iseq_int8
-// A*B function (heap):      GB_AheapB__times_iseq_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISEQ_INT8 || GxB_NO_TIMES_ISEQ_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_int8
 GrB_Info GB_Adot2B__times_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_int8
 GrB_Info GB_Adot3B__times_iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_int8
+GrB_Info GB_Asaxpy3B__times_iseq_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_uint16.c b/Source/Generated/GB_AxB__times_iseq_uint16.c
index 0569264155..5ea269179c 100644
--- a/Source/Generated/GB_AxB__times_iseq_uint16.c
+++ b/Source/Generated/GB_AxB__times_iseq_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_uint16
 // A'*B function (dot2):     GB_Adot2B__times_iseq_uint16
 // A'*B function (dot3):     GB_Adot3B__times_iseq_uint16
-// A*B function (heap):      GB_AheapB__times_iseq_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISEQ_UINT16 || GxB_NO_TIMES_ISEQ_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_uint16
 GrB_Info GB_Adot2B__times_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_uint16
 GrB_Info GB_Adot3B__times_iseq_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_uint16
+GrB_Info GB_Asaxpy3B__times_iseq_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_uint32.c b/Source/Generated/GB_AxB__times_iseq_uint32.c
index 3a880722e4..e32aa10ee9 100644
--- a/Source/Generated/GB_AxB__times_iseq_uint32.c
+++ b/Source/Generated/GB_AxB__times_iseq_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_uint32
 // A'*B function (dot2):     GB_Adot2B__times_iseq_uint32
 // A'*B function (dot3):     GB_Adot3B__times_iseq_uint32
-// A*B function (heap):      GB_AheapB__times_iseq_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISEQ_UINT32 || GxB_NO_TIMES_ISEQ_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_uint32
 GrB_Info GB_Adot2B__times_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_uint32
 GrB_Info GB_Adot3B__times_iseq_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_uint32
+GrB_Info GB_Asaxpy3B__times_iseq_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_uint64.c b/Source/Generated/GB_AxB__times_iseq_uint64.c
index 64f7ade19a..891c3156ab 100644
--- a/Source/Generated/GB_AxB__times_iseq_uint64.c
+++ b/Source/Generated/GB_AxB__times_iseq_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_uint64
 // A'*B function (dot2):     GB_Adot2B__times_iseq_uint64
 // A'*B function (dot3):     GB_Adot3B__times_iseq_uint64
-// A*B function (heap):      GB_AheapB__times_iseq_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISEQ_UINT64 || GxB_NO_TIMES_ISEQ_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_uint64
 GrB_Info GB_Adot2B__times_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_uint64
 GrB_Info GB_Adot3B__times_iseq_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_uint64
+GrB_Info GB_Asaxpy3B__times_iseq_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_iseq_uint8.c b/Source/Generated/GB_AxB__times_iseq_uint8.c
index 635e6419c4..6a6710b4fb 100644
--- a/Source/Generated/GB_AxB__times_iseq_uint8.c
+++ b/Source/Generated/GB_AxB__times_iseq_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_iseq_uint8
 // A'*B function (dot2):     GB_Adot2B__times_iseq_uint8
 // A'*B function (dot3):     GB_Adot3B__times_iseq_uint8
-// A*B function (heap):      GB_AheapB__times_iseq_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_iseq_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_iseq_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik == bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik == bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x == y) ;
+#define GB_MULT(z, x, y) \
+    z = (x == y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x == y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x == y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISEQ_UINT8 || GxB_NO_TIMES_ISEQ_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_iseq_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_iseq_uint8
 GrB_Info GB_Adot2B__times_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_iseq_uint8
 GrB_Info GB_Adot3B__times_iseq_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_iseq_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_iseq_uint8
+GrB_Info GB_Asaxpy3B__times_iseq_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_fp32.c b/Source/Generated/GB_AxB__times_isge_fp32.c
index c363a0160b..fe8ec047da 100644
--- a/Source/Generated/GB_AxB__times_isge_fp32.c
+++ b/Source/Generated/GB_AxB__times_isge_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_fp32
 // A'*B function (dot2):     GB_Adot2B__times_isge_fp32
 // A'*B function (dot3):     GB_Adot3B__times_isge_fp32
-// A*B function (heap):      GB_AheapB__times_isge_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISGE_FP32 || GxB_NO_TIMES_ISGE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_fp32
 GrB_Info GB_Adot2B__times_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_fp32
 GrB_Info GB_Adot3B__times_isge_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_fp32
+GrB_Info GB_Asaxpy3B__times_isge_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_fp64.c b/Source/Generated/GB_AxB__times_isge_fp64.c
index 63f0eecef5..ba5cf85a6a 100644
--- a/Source/Generated/GB_AxB__times_isge_fp64.c
+++ b/Source/Generated/GB_AxB__times_isge_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_fp64
 // A'*B function (dot2):     GB_Adot2B__times_isge_fp64
 // A'*B function (dot3):     GB_Adot3B__times_isge_fp64
-// A*B function (heap):      GB_AheapB__times_isge_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISGE_FP64 || GxB_NO_TIMES_ISGE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_fp64
 GrB_Info GB_Adot2B__times_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_fp64
 GrB_Info GB_Adot3B__times_isge_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_fp64
+GrB_Info GB_Asaxpy3B__times_isge_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_int16.c b/Source/Generated/GB_AxB__times_isge_int16.c
index d516fd67ab..4897270986 100644
--- a/Source/Generated/GB_AxB__times_isge_int16.c
+++ b/Source/Generated/GB_AxB__times_isge_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_int16
 // A'*B function (dot2):     GB_Adot2B__times_isge_int16
 // A'*B function (dot3):     GB_Adot3B__times_isge_int16
-// A*B function (heap):      GB_AheapB__times_isge_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISGE_INT16 || GxB_NO_TIMES_ISGE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_int16
 GrB_Info GB_Adot2B__times_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_int16
 GrB_Info GB_Adot3B__times_isge_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_int16
+GrB_Info GB_Asaxpy3B__times_isge_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_int32.c b/Source/Generated/GB_AxB__times_isge_int32.c
index 344490fee8..dfed62d4f9 100644
--- a/Source/Generated/GB_AxB__times_isge_int32.c
+++ b/Source/Generated/GB_AxB__times_isge_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_int32
 // A'*B function (dot2):     GB_Adot2B__times_isge_int32
 // A'*B function (dot3):     GB_Adot3B__times_isge_int32
-// A*B function (heap):      GB_AheapB__times_isge_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISGE_INT32 || GxB_NO_TIMES_ISGE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_int32
 GrB_Info GB_Adot2B__times_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_int32
 GrB_Info GB_Adot3B__times_isge_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_int32
+GrB_Info GB_Asaxpy3B__times_isge_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_int64.c b/Source/Generated/GB_AxB__times_isge_int64.c
index ac00dd488a..e5be5f7889 100644
--- a/Source/Generated/GB_AxB__times_isge_int64.c
+++ b/Source/Generated/GB_AxB__times_isge_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_int64
 // A'*B function (dot2):     GB_Adot2B__times_isge_int64
 // A'*B function (dot3):     GB_Adot3B__times_isge_int64
-// A*B function (heap):      GB_AheapB__times_isge_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISGE_INT64 || GxB_NO_TIMES_ISGE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_int64
 GrB_Info GB_Adot2B__times_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_int64
 GrB_Info GB_Adot3B__times_isge_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_int64
+GrB_Info GB_Asaxpy3B__times_isge_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_int8.c b/Source/Generated/GB_AxB__times_isge_int8.c
index 669d71d882..0a8dd280f0 100644
--- a/Source/Generated/GB_AxB__times_isge_int8.c
+++ b/Source/Generated/GB_AxB__times_isge_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_int8
 // A'*B function (dot2):     GB_Adot2B__times_isge_int8
 // A'*B function (dot3):     GB_Adot3B__times_isge_int8
-// A*B function (heap):      GB_AheapB__times_isge_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISGE_INT8 || GxB_NO_TIMES_ISGE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_int8
 GrB_Info GB_Adot2B__times_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_int8
 GrB_Info GB_Adot3B__times_isge_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_int8
+GrB_Info GB_Asaxpy3B__times_isge_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_uint16.c b/Source/Generated/GB_AxB__times_isge_uint16.c
index 92bb152a5c..e8003729ef 100644
--- a/Source/Generated/GB_AxB__times_isge_uint16.c
+++ b/Source/Generated/GB_AxB__times_isge_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_uint16
 // A'*B function (dot2):     GB_Adot2B__times_isge_uint16
 // A'*B function (dot3):     GB_Adot3B__times_isge_uint16
-// A*B function (heap):      GB_AheapB__times_isge_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISGE_UINT16 || GxB_NO_TIMES_ISGE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_uint16
 GrB_Info GB_Adot2B__times_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_uint16
 GrB_Info GB_Adot3B__times_isge_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_uint16
+GrB_Info GB_Asaxpy3B__times_isge_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_uint32.c b/Source/Generated/GB_AxB__times_isge_uint32.c
index 0761f19642..05c0eb4f31 100644
--- a/Source/Generated/GB_AxB__times_isge_uint32.c
+++ b/Source/Generated/GB_AxB__times_isge_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_uint32
 // A'*B function (dot2):     GB_Adot2B__times_isge_uint32
 // A'*B function (dot3):     GB_Adot3B__times_isge_uint32
-// A*B function (heap):      GB_AheapB__times_isge_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISGE_UINT32 || GxB_NO_TIMES_ISGE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_uint32
 GrB_Info GB_Adot2B__times_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_uint32
 GrB_Info GB_Adot3B__times_isge_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_uint32
+GrB_Info GB_Asaxpy3B__times_isge_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_uint64.c b/Source/Generated/GB_AxB__times_isge_uint64.c
index 69fc54f649..c8f2b0ccb1 100644
--- a/Source/Generated/GB_AxB__times_isge_uint64.c
+++ b/Source/Generated/GB_AxB__times_isge_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_uint64
 // A'*B function (dot2):     GB_Adot2B__times_isge_uint64
 // A'*B function (dot3):     GB_Adot3B__times_isge_uint64
-// A*B function (heap):      GB_AheapB__times_isge_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISGE_UINT64 || GxB_NO_TIMES_ISGE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_uint64
 GrB_Info GB_Adot2B__times_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_uint64
 GrB_Info GB_Adot3B__times_isge_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_uint64
+GrB_Info GB_Asaxpy3B__times_isge_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isge_uint8.c b/Source/Generated/GB_AxB__times_isge_uint8.c
index b87411d39e..b582caad59 100644
--- a/Source/Generated/GB_AxB__times_isge_uint8.c
+++ b/Source/Generated/GB_AxB__times_isge_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isge_uint8
 // A'*B function (dot2):     GB_Adot2B__times_isge_uint8
 // A'*B function (dot3):     GB_Adot3B__times_isge_uint8
-// A*B function (heap):      GB_AheapB__times_isge_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isge_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isge_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik >= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik >= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x >= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x >= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x >= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x >= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISGE_UINT8 || GxB_NO_TIMES_ISGE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isge_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isge_uint8
 GrB_Info GB_Adot2B__times_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isge_uint8
 GrB_Info GB_Adot3B__times_isge_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isge_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isge_uint8
+GrB_Info GB_Asaxpy3B__times_isge_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_fp32.c b/Source/Generated/GB_AxB__times_isgt_fp32.c
index bd3ff4440c..9815edf964 100644
--- a/Source/Generated/GB_AxB__times_isgt_fp32.c
+++ b/Source/Generated/GB_AxB__times_isgt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_fp32
 // A'*B function (dot2):     GB_Adot2B__times_isgt_fp32
 // A'*B function (dot3):     GB_Adot3B__times_isgt_fp32
-// A*B function (heap):      GB_AheapB__times_isgt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISGT_FP32 || GxB_NO_TIMES_ISGT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_fp32
 GrB_Info GB_Adot2B__times_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_fp32
 GrB_Info GB_Adot3B__times_isgt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_fp32
+GrB_Info GB_Asaxpy3B__times_isgt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_fp64.c b/Source/Generated/GB_AxB__times_isgt_fp64.c
index c0793a4d7f..9c8174326e 100644
--- a/Source/Generated/GB_AxB__times_isgt_fp64.c
+++ b/Source/Generated/GB_AxB__times_isgt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_fp64
 // A'*B function (dot2):     GB_Adot2B__times_isgt_fp64
 // A'*B function (dot3):     GB_Adot3B__times_isgt_fp64
-// A*B function (heap):      GB_AheapB__times_isgt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISGT_FP64 || GxB_NO_TIMES_ISGT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_fp64
 GrB_Info GB_Adot2B__times_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_fp64
 GrB_Info GB_Adot3B__times_isgt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_fp64
+GrB_Info GB_Asaxpy3B__times_isgt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_int16.c b/Source/Generated/GB_AxB__times_isgt_int16.c
index bcb26ed833..df967e17cb 100644
--- a/Source/Generated/GB_AxB__times_isgt_int16.c
+++ b/Source/Generated/GB_AxB__times_isgt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_int16
 // A'*B function (dot2):     GB_Adot2B__times_isgt_int16
 // A'*B function (dot3):     GB_Adot3B__times_isgt_int16
-// A*B function (heap):      GB_AheapB__times_isgt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISGT_INT16 || GxB_NO_TIMES_ISGT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_int16
 GrB_Info GB_Adot2B__times_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_int16
 GrB_Info GB_Adot3B__times_isgt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_int16
+GrB_Info GB_Asaxpy3B__times_isgt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_int32.c b/Source/Generated/GB_AxB__times_isgt_int32.c
index d72413ccd3..1c44e65515 100644
--- a/Source/Generated/GB_AxB__times_isgt_int32.c
+++ b/Source/Generated/GB_AxB__times_isgt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_int32
 // A'*B function (dot2):     GB_Adot2B__times_isgt_int32
 // A'*B function (dot3):     GB_Adot3B__times_isgt_int32
-// A*B function (heap):      GB_AheapB__times_isgt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISGT_INT32 || GxB_NO_TIMES_ISGT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_int32
 GrB_Info GB_Adot2B__times_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_int32
 GrB_Info GB_Adot3B__times_isgt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_int32
+GrB_Info GB_Asaxpy3B__times_isgt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_int64.c b/Source/Generated/GB_AxB__times_isgt_int64.c
index 48dd79f1e2..db783d68a6 100644
--- a/Source/Generated/GB_AxB__times_isgt_int64.c
+++ b/Source/Generated/GB_AxB__times_isgt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_int64
 // A'*B function (dot2):     GB_Adot2B__times_isgt_int64
 // A'*B function (dot3):     GB_Adot3B__times_isgt_int64
-// A*B function (heap):      GB_AheapB__times_isgt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISGT_INT64 || GxB_NO_TIMES_ISGT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_int64
 GrB_Info GB_Adot2B__times_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_int64
 GrB_Info GB_Adot3B__times_isgt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_int64
+GrB_Info GB_Asaxpy3B__times_isgt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_int8.c b/Source/Generated/GB_AxB__times_isgt_int8.c
index 329f78c6cd..aa174b270b 100644
--- a/Source/Generated/GB_AxB__times_isgt_int8.c
+++ b/Source/Generated/GB_AxB__times_isgt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_int8
 // A'*B function (dot2):     GB_Adot2B__times_isgt_int8
 // A'*B function (dot3):     GB_Adot3B__times_isgt_int8
-// A*B function (heap):      GB_AheapB__times_isgt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISGT_INT8 || GxB_NO_TIMES_ISGT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_int8
 GrB_Info GB_Adot2B__times_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_int8
 GrB_Info GB_Adot3B__times_isgt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_int8
+GrB_Info GB_Asaxpy3B__times_isgt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_uint16.c b/Source/Generated/GB_AxB__times_isgt_uint16.c
index 6c31744575..d2ca11b894 100644
--- a/Source/Generated/GB_AxB__times_isgt_uint16.c
+++ b/Source/Generated/GB_AxB__times_isgt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_uint16
 // A'*B function (dot2):     GB_Adot2B__times_isgt_uint16
 // A'*B function (dot3):     GB_Adot3B__times_isgt_uint16
-// A*B function (heap):      GB_AheapB__times_isgt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISGT_UINT16 || GxB_NO_TIMES_ISGT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_uint16
 GrB_Info GB_Adot2B__times_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_uint16
 GrB_Info GB_Adot3B__times_isgt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_uint16
+GrB_Info GB_Asaxpy3B__times_isgt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_uint32.c b/Source/Generated/GB_AxB__times_isgt_uint32.c
index b9197c089f..df21722415 100644
--- a/Source/Generated/GB_AxB__times_isgt_uint32.c
+++ b/Source/Generated/GB_AxB__times_isgt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_uint32
 // A'*B function (dot2):     GB_Adot2B__times_isgt_uint32
 // A'*B function (dot3):     GB_Adot3B__times_isgt_uint32
-// A*B function (heap):      GB_AheapB__times_isgt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISGT_UINT32 || GxB_NO_TIMES_ISGT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_uint32
 GrB_Info GB_Adot2B__times_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_uint32
 GrB_Info GB_Adot3B__times_isgt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_uint32
+GrB_Info GB_Asaxpy3B__times_isgt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_uint64.c b/Source/Generated/GB_AxB__times_isgt_uint64.c
index ba55de442e..c0400eadf1 100644
--- a/Source/Generated/GB_AxB__times_isgt_uint64.c
+++ b/Source/Generated/GB_AxB__times_isgt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_uint64
 // A'*B function (dot2):     GB_Adot2B__times_isgt_uint64
 // A'*B function (dot3):     GB_Adot3B__times_isgt_uint64
-// A*B function (heap):      GB_AheapB__times_isgt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISGT_UINT64 || GxB_NO_TIMES_ISGT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_uint64
 GrB_Info GB_Adot2B__times_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_uint64
 GrB_Info GB_Adot3B__times_isgt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_uint64
+GrB_Info GB_Asaxpy3B__times_isgt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isgt_uint8.c b/Source/Generated/GB_AxB__times_isgt_uint8.c
index c4a1562b8a..c40fcae672 100644
--- a/Source/Generated/GB_AxB__times_isgt_uint8.c
+++ b/Source/Generated/GB_AxB__times_isgt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isgt_uint8
 // A'*B function (dot2):     GB_Adot2B__times_isgt_uint8
 // A'*B function (dot3):     GB_Adot3B__times_isgt_uint8
-// A*B function (heap):      GB_AheapB__times_isgt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isgt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isgt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik > bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik > bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x > y) ;
+#define GB_MULT(z, x, y) \
+    z = (x > y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x > y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x > y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISGT_UINT8 || GxB_NO_TIMES_ISGT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isgt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isgt_uint8
 GrB_Info GB_Adot2B__times_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isgt_uint8
 GrB_Info GB_Adot3B__times_isgt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isgt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isgt_uint8
+GrB_Info GB_Asaxpy3B__times_isgt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_fp32.c b/Source/Generated/GB_AxB__times_isle_fp32.c
index f51ab9d99b..6ef4f20780 100644
--- a/Source/Generated/GB_AxB__times_isle_fp32.c
+++ b/Source/Generated/GB_AxB__times_isle_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_fp32
 // A'*B function (dot2):     GB_Adot2B__times_isle_fp32
 // A'*B function (dot3):     GB_Adot3B__times_isle_fp32
-// A*B function (heap):      GB_AheapB__times_isle_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISLE_FP32 || GxB_NO_TIMES_ISLE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_fp32
 GrB_Info GB_Adot2B__times_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_fp32
 GrB_Info GB_Adot3B__times_isle_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_fp32
+GrB_Info GB_Asaxpy3B__times_isle_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_fp64.c b/Source/Generated/GB_AxB__times_isle_fp64.c
index ff71163d69..aed11d9be1 100644
--- a/Source/Generated/GB_AxB__times_isle_fp64.c
+++ b/Source/Generated/GB_AxB__times_isle_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_fp64
 // A'*B function (dot2):     GB_Adot2B__times_isle_fp64
 // A'*B function (dot3):     GB_Adot3B__times_isle_fp64
-// A*B function (heap):      GB_AheapB__times_isle_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISLE_FP64 || GxB_NO_TIMES_ISLE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_fp64
 GrB_Info GB_Adot2B__times_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_fp64
 GrB_Info GB_Adot3B__times_isle_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_fp64
+GrB_Info GB_Asaxpy3B__times_isle_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_int16.c b/Source/Generated/GB_AxB__times_isle_int16.c
index 6426c03c53..d0e1328c6e 100644
--- a/Source/Generated/GB_AxB__times_isle_int16.c
+++ b/Source/Generated/GB_AxB__times_isle_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_int16
 // A'*B function (dot2):     GB_Adot2B__times_isle_int16
 // A'*B function (dot3):     GB_Adot3B__times_isle_int16
-// A*B function (heap):      GB_AheapB__times_isle_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISLE_INT16 || GxB_NO_TIMES_ISLE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_int16
 GrB_Info GB_Adot2B__times_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_int16
 GrB_Info GB_Adot3B__times_isle_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_int16
+GrB_Info GB_Asaxpy3B__times_isle_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_int32.c b/Source/Generated/GB_AxB__times_isle_int32.c
index 6ac9aaeed2..7e6aa95a59 100644
--- a/Source/Generated/GB_AxB__times_isle_int32.c
+++ b/Source/Generated/GB_AxB__times_isle_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_int32
 // A'*B function (dot2):     GB_Adot2B__times_isle_int32
 // A'*B function (dot3):     GB_Adot3B__times_isle_int32
-// A*B function (heap):      GB_AheapB__times_isle_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISLE_INT32 || GxB_NO_TIMES_ISLE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_int32
 GrB_Info GB_Adot2B__times_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_int32
 GrB_Info GB_Adot3B__times_isle_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_int32
+GrB_Info GB_Asaxpy3B__times_isle_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_int64.c b/Source/Generated/GB_AxB__times_isle_int64.c
index 9e216e2f5f..ae91971fe2 100644
--- a/Source/Generated/GB_AxB__times_isle_int64.c
+++ b/Source/Generated/GB_AxB__times_isle_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_int64
 // A'*B function (dot2):     GB_Adot2B__times_isle_int64
 // A'*B function (dot3):     GB_Adot3B__times_isle_int64
-// A*B function (heap):      GB_AheapB__times_isle_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISLE_INT64 || GxB_NO_TIMES_ISLE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_int64
 GrB_Info GB_Adot2B__times_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_int64
 GrB_Info GB_Adot3B__times_isle_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_int64
+GrB_Info GB_Asaxpy3B__times_isle_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_int8.c b/Source/Generated/GB_AxB__times_isle_int8.c
index ba446643ac..42b98f9c2c 100644
--- a/Source/Generated/GB_AxB__times_isle_int8.c
+++ b/Source/Generated/GB_AxB__times_isle_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_int8
 // A'*B function (dot2):     GB_Adot2B__times_isle_int8
 // A'*B function (dot3):     GB_Adot3B__times_isle_int8
-// A*B function (heap):      GB_AheapB__times_isle_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISLE_INT8 || GxB_NO_TIMES_ISLE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_int8
 GrB_Info GB_Adot2B__times_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_int8
 GrB_Info GB_Adot3B__times_isle_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_int8
+GrB_Info GB_Asaxpy3B__times_isle_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_uint16.c b/Source/Generated/GB_AxB__times_isle_uint16.c
index 8b8fe7b8eb..96ba87d7ec 100644
--- a/Source/Generated/GB_AxB__times_isle_uint16.c
+++ b/Source/Generated/GB_AxB__times_isle_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_uint16
 // A'*B function (dot2):     GB_Adot2B__times_isle_uint16
 // A'*B function (dot3):     GB_Adot3B__times_isle_uint16
-// A*B function (heap):      GB_AheapB__times_isle_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISLE_UINT16 || GxB_NO_TIMES_ISLE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_uint16
 GrB_Info GB_Adot2B__times_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_uint16
 GrB_Info GB_Adot3B__times_isle_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_uint16
+GrB_Info GB_Asaxpy3B__times_isle_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_uint32.c b/Source/Generated/GB_AxB__times_isle_uint32.c
index f8e114d314..458f7d8a94 100644
--- a/Source/Generated/GB_AxB__times_isle_uint32.c
+++ b/Source/Generated/GB_AxB__times_isle_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_uint32
 // A'*B function (dot2):     GB_Adot2B__times_isle_uint32
 // A'*B function (dot3):     GB_Adot3B__times_isle_uint32
-// A*B function (heap):      GB_AheapB__times_isle_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISLE_UINT32 || GxB_NO_TIMES_ISLE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_uint32
 GrB_Info GB_Adot2B__times_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_uint32
 GrB_Info GB_Adot3B__times_isle_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_uint32
+GrB_Info GB_Asaxpy3B__times_isle_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_uint64.c b/Source/Generated/GB_AxB__times_isle_uint64.c
index d47a1b8304..c3a02c785f 100644
--- a/Source/Generated/GB_AxB__times_isle_uint64.c
+++ b/Source/Generated/GB_AxB__times_isle_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_uint64
 // A'*B function (dot2):     GB_Adot2B__times_isle_uint64
 // A'*B function (dot3):     GB_Adot3B__times_isle_uint64
-// A*B function (heap):      GB_AheapB__times_isle_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISLE_UINT64 || GxB_NO_TIMES_ISLE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_uint64
 GrB_Info GB_Adot2B__times_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_uint64
 GrB_Info GB_Adot3B__times_isle_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_uint64
+GrB_Info GB_Asaxpy3B__times_isle_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isle_uint8.c b/Source/Generated/GB_AxB__times_isle_uint8.c
index cbed4e5cc8..93e77a1495 100644
--- a/Source/Generated/GB_AxB__times_isle_uint8.c
+++ b/Source/Generated/GB_AxB__times_isle_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isle_uint8
 // A'*B function (dot2):     GB_Adot2B__times_isle_uint8
 // A'*B function (dot3):     GB_Adot3B__times_isle_uint8
-// A*B function (heap):      GB_AheapB__times_isle_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isle_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isle_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik <= bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik <= bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x <= y) ;
+#define GB_MULT(z, x, y) \
+    z = (x <= y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x <= y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x <= y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISLE_UINT8 || GxB_NO_TIMES_ISLE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isle_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isle_uint8
 GrB_Info GB_Adot2B__times_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isle_uint8
 GrB_Info GB_Adot3B__times_isle_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isle_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isle_uint8
+GrB_Info GB_Asaxpy3B__times_isle_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_fp32.c b/Source/Generated/GB_AxB__times_islt_fp32.c
index eaec9b06a7..f33e1b215c 100644
--- a/Source/Generated/GB_AxB__times_islt_fp32.c
+++ b/Source/Generated/GB_AxB__times_islt_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_fp32
 // A'*B function (dot2):     GB_Adot2B__times_islt_fp32
 // A'*B function (dot3):     GB_Adot3B__times_islt_fp32
-// A*B function (heap):      GB_AheapB__times_islt_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISLT_FP32 || GxB_NO_TIMES_ISLT_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_fp32
 GrB_Info GB_Adot2B__times_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_fp32
 GrB_Info GB_Adot3B__times_islt_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_fp32
+GrB_Info GB_Asaxpy3B__times_islt_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_fp64.c b/Source/Generated/GB_AxB__times_islt_fp64.c
index 08a965eb70..93d34c55f2 100644
--- a/Source/Generated/GB_AxB__times_islt_fp64.c
+++ b/Source/Generated/GB_AxB__times_islt_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_fp64
 // A'*B function (dot2):     GB_Adot2B__times_islt_fp64
 // A'*B function (dot3):     GB_Adot3B__times_islt_fp64
-// A*B function (heap):      GB_AheapB__times_islt_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISLT_FP64 || GxB_NO_TIMES_ISLT_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_fp64
 GrB_Info GB_Adot2B__times_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_fp64
 GrB_Info GB_Adot3B__times_islt_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_fp64
+GrB_Info GB_Asaxpy3B__times_islt_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_int16.c b/Source/Generated/GB_AxB__times_islt_int16.c
index 75347df82e..8f4fee1f60 100644
--- a/Source/Generated/GB_AxB__times_islt_int16.c
+++ b/Source/Generated/GB_AxB__times_islt_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_int16
 // A'*B function (dot2):     GB_Adot2B__times_islt_int16
 // A'*B function (dot3):     GB_Adot3B__times_islt_int16
-// A*B function (heap):      GB_AheapB__times_islt_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISLT_INT16 || GxB_NO_TIMES_ISLT_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_int16
 GrB_Info GB_Adot2B__times_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_int16
 GrB_Info GB_Adot3B__times_islt_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_int16
+GrB_Info GB_Asaxpy3B__times_islt_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_int32.c b/Source/Generated/GB_AxB__times_islt_int32.c
index 0545c9b514..326d9dbf79 100644
--- a/Source/Generated/GB_AxB__times_islt_int32.c
+++ b/Source/Generated/GB_AxB__times_islt_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_int32
 // A'*B function (dot2):     GB_Adot2B__times_islt_int32
 // A'*B function (dot3):     GB_Adot3B__times_islt_int32
-// A*B function (heap):      GB_AheapB__times_islt_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISLT_INT32 || GxB_NO_TIMES_ISLT_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_int32
 GrB_Info GB_Adot2B__times_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_int32
 GrB_Info GB_Adot3B__times_islt_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_int32
+GrB_Info GB_Asaxpy3B__times_islt_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_int64.c b/Source/Generated/GB_AxB__times_islt_int64.c
index 9068cc1ffb..083d2813c0 100644
--- a/Source/Generated/GB_AxB__times_islt_int64.c
+++ b/Source/Generated/GB_AxB__times_islt_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_int64
 // A'*B function (dot2):     GB_Adot2B__times_islt_int64
 // A'*B function (dot3):     GB_Adot3B__times_islt_int64
-// A*B function (heap):      GB_AheapB__times_islt_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISLT_INT64 || GxB_NO_TIMES_ISLT_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_int64
 GrB_Info GB_Adot2B__times_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_int64
 GrB_Info GB_Adot3B__times_islt_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_int64
+GrB_Info GB_Asaxpy3B__times_islt_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_int8.c b/Source/Generated/GB_AxB__times_islt_int8.c
index a735c8963b..c660fd3279 100644
--- a/Source/Generated/GB_AxB__times_islt_int8.c
+++ b/Source/Generated/GB_AxB__times_islt_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_int8
 // A'*B function (dot2):     GB_Adot2B__times_islt_int8
 // A'*B function (dot3):     GB_Adot3B__times_islt_int8
-// A*B function (heap):      GB_AheapB__times_islt_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISLT_INT8 || GxB_NO_TIMES_ISLT_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_int8
 GrB_Info GB_Adot2B__times_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_int8
 GrB_Info GB_Adot3B__times_islt_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_int8
+GrB_Info GB_Asaxpy3B__times_islt_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_uint16.c b/Source/Generated/GB_AxB__times_islt_uint16.c
index ecd0ae3889..1124212c6e 100644
--- a/Source/Generated/GB_AxB__times_islt_uint16.c
+++ b/Source/Generated/GB_AxB__times_islt_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_uint16
 // A'*B function (dot2):     GB_Adot2B__times_islt_uint16
 // A'*B function (dot3):     GB_Adot3B__times_islt_uint16
-// A*B function (heap):      GB_AheapB__times_islt_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISLT_UINT16 || GxB_NO_TIMES_ISLT_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_uint16
 GrB_Info GB_Adot2B__times_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_uint16
 GrB_Info GB_Adot3B__times_islt_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_uint16
+GrB_Info GB_Asaxpy3B__times_islt_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_uint32.c b/Source/Generated/GB_AxB__times_islt_uint32.c
index c1a51d6131..5a766d0978 100644
--- a/Source/Generated/GB_AxB__times_islt_uint32.c
+++ b/Source/Generated/GB_AxB__times_islt_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_uint32
 // A'*B function (dot2):     GB_Adot2B__times_islt_uint32
 // A'*B function (dot3):     GB_Adot3B__times_islt_uint32
-// A*B function (heap):      GB_AheapB__times_islt_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISLT_UINT32 || GxB_NO_TIMES_ISLT_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_uint32
 GrB_Info GB_Adot2B__times_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_uint32
 GrB_Info GB_Adot3B__times_islt_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_uint32
+GrB_Info GB_Asaxpy3B__times_islt_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_uint64.c b/Source/Generated/GB_AxB__times_islt_uint64.c
index 83db2bd45b..25d4ab7a6c 100644
--- a/Source/Generated/GB_AxB__times_islt_uint64.c
+++ b/Source/Generated/GB_AxB__times_islt_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_uint64
 // A'*B function (dot2):     GB_Adot2B__times_islt_uint64
 // A'*B function (dot3):     GB_Adot3B__times_islt_uint64
-// A*B function (heap):      GB_AheapB__times_islt_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISLT_UINT64 || GxB_NO_TIMES_ISLT_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_uint64
 GrB_Info GB_Adot2B__times_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_uint64
 GrB_Info GB_Adot3B__times_islt_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_uint64
+GrB_Info GB_Asaxpy3B__times_islt_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_islt_uint8.c b/Source/Generated/GB_AxB__times_islt_uint8.c
index a87dc97f89..15e8d4e86c 100644
--- a/Source/Generated/GB_AxB__times_islt_uint8.c
+++ b/Source/Generated/GB_AxB__times_islt_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_islt_uint8
 // A'*B function (dot2):     GB_Adot2B__times_islt_uint8
 // A'*B function (dot3):     GB_Adot3B__times_islt_uint8
-// A*B function (heap):      GB_AheapB__times_islt_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_islt_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_islt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik < bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik < bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x < y) ;
+#define GB_MULT(z, x, y) \
+    z = (x < y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x < y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x < y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISLT_UINT8 || GxB_NO_TIMES_ISLT_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_islt_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_islt_uint8
 GrB_Info GB_Adot2B__times_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_islt_uint8
 GrB_Info GB_Adot3B__times_islt_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_islt_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_islt_uint8
+GrB_Info GB_Asaxpy3B__times_islt_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_fp32.c b/Source/Generated/GB_AxB__times_isne_fp32.c
index 71dff1740f..8d72d7dbd8 100644
--- a/Source/Generated/GB_AxB__times_isne_fp32.c
+++ b/Source/Generated/GB_AxB__times_isne_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_fp32
 // A'*B function (dot2):     GB_Adot2B__times_isne_fp32
 // A'*B function (dot3):     GB_Adot3B__times_isne_fp32
-// A*B function (heap):      GB_AheapB__times_isne_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_ISNE_FP32 || GxB_NO_TIMES_ISNE_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_fp32
 GrB_Info GB_Adot2B__times_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_fp32
 GrB_Info GB_Adot3B__times_isne_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_fp32
+GrB_Info GB_Asaxpy3B__times_isne_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_fp64.c b/Source/Generated/GB_AxB__times_isne_fp64.c
index c05c26b77f..f7f4c87c70 100644
--- a/Source/Generated/GB_AxB__times_isne_fp64.c
+++ b/Source/Generated/GB_AxB__times_isne_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_fp64
 // A'*B function (dot2):     GB_Adot2B__times_isne_fp64
 // A'*B function (dot3):     GB_Adot3B__times_isne_fp64
-// A*B function (heap):      GB_AheapB__times_isne_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_ISNE_FP64 || GxB_NO_TIMES_ISNE_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_fp64
 GrB_Info GB_Adot2B__times_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_fp64
 GrB_Info GB_Adot3B__times_isne_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_fp64
+GrB_Info GB_Asaxpy3B__times_isne_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_int16.c b/Source/Generated/GB_AxB__times_isne_int16.c
index 478369fe3a..53283dbb70 100644
--- a/Source/Generated/GB_AxB__times_isne_int16.c
+++ b/Source/Generated/GB_AxB__times_isne_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_int16
 // A'*B function (dot2):     GB_Adot2B__times_isne_int16
 // A'*B function (dot3):     GB_Adot3B__times_isne_int16
-// A*B function (heap):      GB_AheapB__times_isne_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_ISNE_INT16 || GxB_NO_TIMES_ISNE_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_int16
 GrB_Info GB_Adot2B__times_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_int16
 GrB_Info GB_Adot3B__times_isne_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_int16
+GrB_Info GB_Asaxpy3B__times_isne_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_int32.c b/Source/Generated/GB_AxB__times_isne_int32.c
index 415a6f813e..20776c795b 100644
--- a/Source/Generated/GB_AxB__times_isne_int32.c
+++ b/Source/Generated/GB_AxB__times_isne_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_int32
 // A'*B function (dot2):     GB_Adot2B__times_isne_int32
 // A'*B function (dot3):     GB_Adot3B__times_isne_int32
-// A*B function (heap):      GB_AheapB__times_isne_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_ISNE_INT32 || GxB_NO_TIMES_ISNE_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_int32
 GrB_Info GB_Adot2B__times_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_int32
 GrB_Info GB_Adot3B__times_isne_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_int32
+GrB_Info GB_Asaxpy3B__times_isne_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_int64.c b/Source/Generated/GB_AxB__times_isne_int64.c
index 2043aa8b6a..a1db89bbdd 100644
--- a/Source/Generated/GB_AxB__times_isne_int64.c
+++ b/Source/Generated/GB_AxB__times_isne_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_int64
 // A'*B function (dot2):     GB_Adot2B__times_isne_int64
 // A'*B function (dot3):     GB_Adot3B__times_isne_int64
-// A*B function (heap):      GB_AheapB__times_isne_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_ISNE_INT64 || GxB_NO_TIMES_ISNE_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_int64
 GrB_Info GB_Adot2B__times_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_int64
 GrB_Info GB_Adot3B__times_isne_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_int64
+GrB_Info GB_Asaxpy3B__times_isne_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_int8.c b/Source/Generated/GB_AxB__times_isne_int8.c
index baeb289e47..679ce981e2 100644
--- a/Source/Generated/GB_AxB__times_isne_int8.c
+++ b/Source/Generated/GB_AxB__times_isne_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_int8
 // A'*B function (dot2):     GB_Adot2B__times_isne_int8
 // A'*B function (dot3):     GB_Adot3B__times_isne_int8
-// A*B function (heap):      GB_AheapB__times_isne_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_ISNE_INT8 || GxB_NO_TIMES_ISNE_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_int8
 GrB_Info GB_Adot2B__times_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_int8
 GrB_Info GB_Adot3B__times_isne_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_int8
+GrB_Info GB_Asaxpy3B__times_isne_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_uint16.c b/Source/Generated/GB_AxB__times_isne_uint16.c
index 0698babef9..119b4e062b 100644
--- a/Source/Generated/GB_AxB__times_isne_uint16.c
+++ b/Source/Generated/GB_AxB__times_isne_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_uint16
 // A'*B function (dot2):     GB_Adot2B__times_isne_uint16
 // A'*B function (dot3):     GB_Adot3B__times_isne_uint16
-// A*B function (heap):      GB_AheapB__times_isne_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_ISNE_UINT16 || GxB_NO_TIMES_ISNE_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_uint16
 GrB_Info GB_Adot2B__times_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_uint16
 GrB_Info GB_Adot3B__times_isne_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_uint16
+GrB_Info GB_Asaxpy3B__times_isne_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_uint32.c b/Source/Generated/GB_AxB__times_isne_uint32.c
index 4c7ca704a7..940dad1ff1 100644
--- a/Source/Generated/GB_AxB__times_isne_uint32.c
+++ b/Source/Generated/GB_AxB__times_isne_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_uint32
 // A'*B function (dot2):     GB_Adot2B__times_isne_uint32
 // A'*B function (dot3):     GB_Adot3B__times_isne_uint32
-// A*B function (heap):      GB_AheapB__times_isne_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_ISNE_UINT32 || GxB_NO_TIMES_ISNE_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_uint32
 GrB_Info GB_Adot2B__times_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_uint32
 GrB_Info GB_Adot3B__times_isne_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_uint32
+GrB_Info GB_Asaxpy3B__times_isne_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_uint64.c b/Source/Generated/GB_AxB__times_isne_uint64.c
index 4dc0fb97aa..d3fa127dc4 100644
--- a/Source/Generated/GB_AxB__times_isne_uint64.c
+++ b/Source/Generated/GB_AxB__times_isne_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_uint64
 // A'*B function (dot2):     GB_Adot2B__times_isne_uint64
 // A'*B function (dot3):     GB_Adot3B__times_isne_uint64
-// A*B function (heap):      GB_AheapB__times_isne_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_ISNE_UINT64 || GxB_NO_TIMES_ISNE_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_uint64
 GrB_Info GB_Adot2B__times_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_uint64
 GrB_Info GB_Adot3B__times_isne_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_uint64
+GrB_Info GB_Asaxpy3B__times_isne_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_isne_uint8.c b/Source/Generated/GB_AxB__times_isne_uint8.c
index 1fd4a6dfaf..b359c16c65 100644
--- a/Source/Generated/GB_AxB__times_isne_uint8.c
+++ b/Source/Generated/GB_AxB__times_isne_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_isne_uint8
 // A'*B function (dot2):     GB_Adot2B__times_isne_uint8
 // A'*B function (dot3):     GB_Adot3B__times_isne_uint8
-// A*B function (heap):      GB_AheapB__times_isne_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_isne_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_isne_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik != bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik != bkj)
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x != y) ;
+#define GB_MULT(z, x, y) \
+    z = (x != y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x != y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x != y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_ISNE_UINT8 || GxB_NO_TIMES_ISNE_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_isne_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_isne_uint8
 GrB_Info GB_Adot2B__times_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_isne_uint8
 GrB_Info GB_Adot3B__times_isne_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_isne_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_isne_uint8
+GrB_Info GB_Asaxpy3B__times_isne_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_fp32.c b/Source/Generated/GB_AxB__times_land_fp32.c
index 54fc6046d2..00c7f737e8 100644
--- a/Source/Generated/GB_AxB__times_land_fp32.c
+++ b/Source/Generated/GB_AxB__times_land_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_fp32
 // A'*B function (dot2):     GB_Adot2B__times_land_fp32
 // A'*B function (dot3):     GB_Adot3B__times_land_fp32
-// A*B function (heap):      GB_AheapB__times_land_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) && (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) && (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) && (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_LAND_FP32 || GxB_NO_TIMES_LAND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_land_fp32
 GrB_Info GB_Adot2B__times_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_fp32
 GrB_Info GB_Adot3B__times_land_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_land_fp32
+GrB_Info GB_Asaxpy3B__times_land_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_fp64.c b/Source/Generated/GB_AxB__times_land_fp64.c
index 227bf19a28..afe34d1779 100644
--- a/Source/Generated/GB_AxB__times_land_fp64.c
+++ b/Source/Generated/GB_AxB__times_land_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_fp64
 // A'*B function (dot2):     GB_Adot2B__times_land_fp64
 // A'*B function (dot3):     GB_Adot3B__times_land_fp64
-// A*B function (heap):      GB_AheapB__times_land_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) && (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) && (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) && (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_LAND_FP64 || GxB_NO_TIMES_LAND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_land_fp64
 GrB_Info GB_Adot2B__times_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_fp64
 GrB_Info GB_Adot3B__times_land_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_land_fp64
+GrB_Info GB_Asaxpy3B__times_land_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_int16.c b/Source/Generated/GB_AxB__times_land_int16.c
index 2edcd49a1f..df56f2bf66 100644
--- a/Source/Generated/GB_AxB__times_land_int16.c
+++ b/Source/Generated/GB_AxB__times_land_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_int16
 // A'*B function (dot2):     GB_Adot2B__times_land_int16
 // A'*B function (dot3):     GB_Adot3B__times_land_int16
-// A*B function (heap):      GB_AheapB__times_land_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_LAND_INT16 || GxB_NO_TIMES_LAND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_int16
 GrB_Info GB_Adot2B__times_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_int16
 GrB_Info GB_Adot3B__times_land_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_int32.c b/Source/Generated/GB_AxB__times_land_int32.c
index b5c31384ca..ef29f4b913 100644
--- a/Source/Generated/GB_AxB__times_land_int32.c
+++ b/Source/Generated/GB_AxB__times_land_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_int32
 // A'*B function (dot2):     GB_Adot2B__times_land_int32
 // A'*B function (dot3):     GB_Adot3B__times_land_int32
-// A*B function (heap):      GB_AheapB__times_land_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_LAND_INT32 || GxB_NO_TIMES_LAND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_int32
 GrB_Info GB_Adot2B__times_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_int32
 GrB_Info GB_Adot3B__times_land_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_int64.c b/Source/Generated/GB_AxB__times_land_int64.c
index c709b80b7b..1bd9117f03 100644
--- a/Source/Generated/GB_AxB__times_land_int64.c
+++ b/Source/Generated/GB_AxB__times_land_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_int64
 // A'*B function (dot2):     GB_Adot2B__times_land_int64
 // A'*B function (dot3):     GB_Adot3B__times_land_int64
-// A*B function (heap):      GB_AheapB__times_land_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_LAND_INT64 || GxB_NO_TIMES_LAND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_int64
 GrB_Info GB_Adot2B__times_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_int64
 GrB_Info GB_Adot3B__times_land_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_int8.c b/Source/Generated/GB_AxB__times_land_int8.c
index 989ecdf3e8..2b4a71cbcf 100644
--- a/Source/Generated/GB_AxB__times_land_int8.c
+++ b/Source/Generated/GB_AxB__times_land_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_int8
 // A'*B function (dot2):     GB_Adot2B__times_land_int8
 // A'*B function (dot3):     GB_Adot3B__times_land_int8
-// A*B function (heap):      GB_AheapB__times_land_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_LAND_INT8 || GxB_NO_TIMES_LAND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_int8
 GrB_Info GB_Adot2B__times_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_int8
 GrB_Info GB_Adot3B__times_land_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_uint16.c b/Source/Generated/GB_AxB__times_land_uint16.c
index 86c2008f9e..697ada46ec 100644
--- a/Source/Generated/GB_AxB__times_land_uint16.c
+++ b/Source/Generated/GB_AxB__times_land_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_uint16
 // A'*B function (dot2):     GB_Adot2B__times_land_uint16
 // A'*B function (dot3):     GB_Adot3B__times_land_uint16
-// A*B function (heap):      GB_AheapB__times_land_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_LAND_UINT16 || GxB_NO_TIMES_LAND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_uint16
 GrB_Info GB_Adot2B__times_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_uint16
 GrB_Info GB_Adot3B__times_land_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_uint32.c b/Source/Generated/GB_AxB__times_land_uint32.c
index d936990dbb..44dbe16ba1 100644
--- a/Source/Generated/GB_AxB__times_land_uint32.c
+++ b/Source/Generated/GB_AxB__times_land_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_uint32
 // A'*B function (dot2):     GB_Adot2B__times_land_uint32
 // A'*B function (dot3):     GB_Adot3B__times_land_uint32
-// A*B function (heap):      GB_AheapB__times_land_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_LAND_UINT32 || GxB_NO_TIMES_LAND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_uint32
 GrB_Info GB_Adot2B__times_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_uint32
 GrB_Info GB_Adot3B__times_land_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_uint64.c b/Source/Generated/GB_AxB__times_land_uint64.c
index 3e32d9a9e5..912489c85d 100644
--- a/Source/Generated/GB_AxB__times_land_uint64.c
+++ b/Source/Generated/GB_AxB__times_land_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_uint64
 // A'*B function (dot2):     GB_Adot2B__times_land_uint64
 // A'*B function (dot3):     GB_Adot3B__times_land_uint64
-// A*B function (heap):      GB_AheapB__times_land_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_LAND_UINT64 || GxB_NO_TIMES_LAND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_uint64
 GrB_Info GB_Adot2B__times_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_uint64
 GrB_Info GB_Adot3B__times_land_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_land_uint8.c b/Source/Generated/GB_AxB__times_land_uint8.c
index 6fc7138a88..342537dd0c 100644
--- a/Source/Generated/GB_AxB__times_land_uint8.c
+++ b/Source/Generated/GB_AxB__times_land_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_land_uint8
 // A'*B function (dot2):     GB_Adot2B__times_land_uint8
 // A'*B function (dot3):     GB_Adot3B__times_land_uint8
-// A*B function (heap):      GB_AheapB__times_land_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_land_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_land_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) && (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) && (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) && (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) && (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) && (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_LAND_UINT8 || GxB_NO_TIMES_LAND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_land_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_land_uint8
 GrB_Info GB_Adot2B__times_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_land_uint8
 GrB_Info GB_Adot3B__times_land_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_land_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_land_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_land_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_fp32.c b/Source/Generated/GB_AxB__times_lor_fp32.c
index 0afa96217c..67ca2f03c4 100644
--- a/Source/Generated/GB_AxB__times_lor_fp32.c
+++ b/Source/Generated/GB_AxB__times_lor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_fp32
 // A'*B function (dot2):     GB_Adot2B__times_lor_fp32
 // A'*B function (dot3):     GB_Adot3B__times_lor_fp32
-// A*B function (heap):      GB_AheapB__times_lor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) || (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) || (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) || (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_LOR_FP32 || GxB_NO_TIMES_LOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_fp32
 GrB_Info GB_Adot2B__times_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_fp32
 GrB_Info GB_Adot3B__times_lor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_lor_fp32
+GrB_Info GB_Asaxpy3B__times_lor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_fp64.c b/Source/Generated/GB_AxB__times_lor_fp64.c
index dc6655176c..35d08ba8e2 100644
--- a/Source/Generated/GB_AxB__times_lor_fp64.c
+++ b/Source/Generated/GB_AxB__times_lor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_fp64
 // A'*B function (dot2):     GB_Adot2B__times_lor_fp64
 // A'*B function (dot3):     GB_Adot3B__times_lor_fp64
-// A*B function (heap):      GB_AheapB__times_lor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) || (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) || (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) || (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_LOR_FP64 || GxB_NO_TIMES_LOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_fp64
 GrB_Info GB_Adot2B__times_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_fp64
 GrB_Info GB_Adot3B__times_lor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_lor_fp64
+GrB_Info GB_Asaxpy3B__times_lor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_int16.c b/Source/Generated/GB_AxB__times_lor_int16.c
index ec1512b777..d6e9fb1d5f 100644
--- a/Source/Generated/GB_AxB__times_lor_int16.c
+++ b/Source/Generated/GB_AxB__times_lor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_int16
 // A'*B function (dot2):     GB_Adot2B__times_lor_int16
 // A'*B function (dot3):     GB_Adot3B__times_lor_int16
-// A*B function (heap):      GB_AheapB__times_lor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_LOR_INT16 || GxB_NO_TIMES_LOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_int16
 GrB_Info GB_Adot2B__times_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_int16
 GrB_Info GB_Adot3B__times_lor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_int32.c b/Source/Generated/GB_AxB__times_lor_int32.c
index 9eb662f3e9..0342017b59 100644
--- a/Source/Generated/GB_AxB__times_lor_int32.c
+++ b/Source/Generated/GB_AxB__times_lor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_int32
 // A'*B function (dot2):     GB_Adot2B__times_lor_int32
 // A'*B function (dot3):     GB_Adot3B__times_lor_int32
-// A*B function (heap):      GB_AheapB__times_lor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_LOR_INT32 || GxB_NO_TIMES_LOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_int32
 GrB_Info GB_Adot2B__times_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_int32
 GrB_Info GB_Adot3B__times_lor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_int64.c b/Source/Generated/GB_AxB__times_lor_int64.c
index 3640aa60ec..5103cb776e 100644
--- a/Source/Generated/GB_AxB__times_lor_int64.c
+++ b/Source/Generated/GB_AxB__times_lor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_int64
 // A'*B function (dot2):     GB_Adot2B__times_lor_int64
 // A'*B function (dot3):     GB_Adot3B__times_lor_int64
-// A*B function (heap):      GB_AheapB__times_lor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_LOR_INT64 || GxB_NO_TIMES_LOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_int64
 GrB_Info GB_Adot2B__times_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_int64
 GrB_Info GB_Adot3B__times_lor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_int8.c b/Source/Generated/GB_AxB__times_lor_int8.c
index 2665b17c26..db8d9047ba 100644
--- a/Source/Generated/GB_AxB__times_lor_int8.c
+++ b/Source/Generated/GB_AxB__times_lor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_int8
 // A'*B function (dot2):     GB_Adot2B__times_lor_int8
 // A'*B function (dot3):     GB_Adot3B__times_lor_int8
-// A*B function (heap):      GB_AheapB__times_lor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_LOR_INT8 || GxB_NO_TIMES_LOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_int8
 GrB_Info GB_Adot2B__times_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_int8
 GrB_Info GB_Adot3B__times_lor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_uint16.c b/Source/Generated/GB_AxB__times_lor_uint16.c
index 9998526a2d..ab1cd1bb4e 100644
--- a/Source/Generated/GB_AxB__times_lor_uint16.c
+++ b/Source/Generated/GB_AxB__times_lor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_uint16
 // A'*B function (dot2):     GB_Adot2B__times_lor_uint16
 // A'*B function (dot3):     GB_Adot3B__times_lor_uint16
-// A*B function (heap):      GB_AheapB__times_lor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_LOR_UINT16 || GxB_NO_TIMES_LOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_uint16
 GrB_Info GB_Adot2B__times_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_uint16
 GrB_Info GB_Adot3B__times_lor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_uint32.c b/Source/Generated/GB_AxB__times_lor_uint32.c
index 51e4ee9136..2d19e03ccc 100644
--- a/Source/Generated/GB_AxB__times_lor_uint32.c
+++ b/Source/Generated/GB_AxB__times_lor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_uint32
 // A'*B function (dot2):     GB_Adot2B__times_lor_uint32
 // A'*B function (dot3):     GB_Adot3B__times_lor_uint32
-// A*B function (heap):      GB_AheapB__times_lor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_LOR_UINT32 || GxB_NO_TIMES_LOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_uint32
 GrB_Info GB_Adot2B__times_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_uint32
 GrB_Info GB_Adot3B__times_lor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_uint64.c b/Source/Generated/GB_AxB__times_lor_uint64.c
index 615b3470d9..c78893771b 100644
--- a/Source/Generated/GB_AxB__times_lor_uint64.c
+++ b/Source/Generated/GB_AxB__times_lor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_uint64
 // A'*B function (dot2):     GB_Adot2B__times_lor_uint64
 // A'*B function (dot3):     GB_Adot3B__times_lor_uint64
-// A*B function (heap):      GB_AheapB__times_lor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_LOR_UINT64 || GxB_NO_TIMES_LOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_uint64
 GrB_Info GB_Adot2B__times_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_uint64
 GrB_Info GB_Adot3B__times_lor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lor_uint8.c b/Source/Generated/GB_AxB__times_lor_uint8.c
index d3c4143221..0b06305ef7 100644
--- a/Source/Generated/GB_AxB__times_lor_uint8.c
+++ b/Source/Generated/GB_AxB__times_lor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lor_uint8
 // A'*B function (dot2):     GB_Adot2B__times_lor_uint8
 // A'*B function (dot3):     GB_Adot3B__times_lor_uint8
-// A*B function (heap):      GB_AheapB__times_lor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_lor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) || (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) || (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) || (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) || (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) || (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_LOR_UINT8 || GxB_NO_TIMES_LOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lor_uint8
 GrB_Info GB_Adot2B__times_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lor_uint8
 GrB_Info GB_Adot3B__times_lor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lor_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_fp32.c b/Source/Generated/GB_AxB__times_lxor_fp32.c
index 905b9232b5..5fd33eb6cf 100644
--- a/Source/Generated/GB_AxB__times_lxor_fp32.c
+++ b/Source/Generated/GB_AxB__times_lxor_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_fp32
 // A'*B function (dot2):     GB_Adot2B__times_lxor_fp32
 // A'*B function (dot3):     GB_Adot3B__times_lxor_fp32
-// A*B function (heap):      GB_AheapB__times_lxor_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) != (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) != (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) != (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_LXOR_FP32 || GxB_NO_TIMES_LXOR_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_fp32
 GrB_Info GB_Adot2B__times_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_fp32
 GrB_Info GB_Adot3B__times_lxor_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_lxor_fp32
+GrB_Info GB_Asaxpy3B__times_lxor_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_fp64.c b/Source/Generated/GB_AxB__times_lxor_fp64.c
index 43b3833e3d..fb0d710248 100644
--- a/Source/Generated/GB_AxB__times_lxor_fp64.c
+++ b/Source/Generated/GB_AxB__times_lxor_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_fp64
 // A'*B function (dot2):     GB_Adot2B__times_lxor_fp64
 // A'*B function (dot3):     GB_Adot3B__times_lxor_fp64
-// A*B function (heap):      GB_AheapB__times_lxor_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= ((aik != 0) != (bkj != 0))
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= ((x != 0) != (y != 0)) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= ((x != 0) != (y != 0))
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_LXOR_FP64 || GxB_NO_TIMES_LXOR_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_fp64
 GrB_Info GB_Adot2B__times_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_fp64
 GrB_Info GB_Adot3B__times_lxor_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_lxor_fp64
+GrB_Info GB_Asaxpy3B__times_lxor_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_int16.c b/Source/Generated/GB_AxB__times_lxor_int16.c
index c6cfa43bcc..34bc6fdd12 100644
--- a/Source/Generated/GB_AxB__times_lxor_int16.c
+++ b/Source/Generated/GB_AxB__times_lxor_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_int16
 // A'*B function (dot2):     GB_Adot2B__times_lxor_int16
 // A'*B function (dot3):     GB_Adot3B__times_lxor_int16
-// A*B function (heap):      GB_AheapB__times_lxor_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_LXOR_INT16 || GxB_NO_TIMES_LXOR_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_int16
 GrB_Info GB_Adot2B__times_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_int16
 GrB_Info GB_Adot3B__times_lxor_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_int32.c b/Source/Generated/GB_AxB__times_lxor_int32.c
index e5efd8dadc..cfa8672b77 100644
--- a/Source/Generated/GB_AxB__times_lxor_int32.c
+++ b/Source/Generated/GB_AxB__times_lxor_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_int32
 // A'*B function (dot2):     GB_Adot2B__times_lxor_int32
 // A'*B function (dot3):     GB_Adot3B__times_lxor_int32
-// A*B function (heap):      GB_AheapB__times_lxor_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_LXOR_INT32 || GxB_NO_TIMES_LXOR_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_int32
 GrB_Info GB_Adot2B__times_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_int32
 GrB_Info GB_Adot3B__times_lxor_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_int64.c b/Source/Generated/GB_AxB__times_lxor_int64.c
index 1a147cb2ed..c7dacb1bd0 100644
--- a/Source/Generated/GB_AxB__times_lxor_int64.c
+++ b/Source/Generated/GB_AxB__times_lxor_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_int64
 // A'*B function (dot2):     GB_Adot2B__times_lxor_int64
 // A'*B function (dot3):     GB_Adot3B__times_lxor_int64
-// A*B function (heap):      GB_AheapB__times_lxor_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_LXOR_INT64 || GxB_NO_TIMES_LXOR_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_int64
 GrB_Info GB_Adot2B__times_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_int64
 GrB_Info GB_Adot3B__times_lxor_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_int8.c b/Source/Generated/GB_AxB__times_lxor_int8.c
index 98994bf32b..2c476eacfc 100644
--- a/Source/Generated/GB_AxB__times_lxor_int8.c
+++ b/Source/Generated/GB_AxB__times_lxor_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_int8
 // A'*B function (dot2):     GB_Adot2B__times_lxor_int8
 // A'*B function (dot3):     GB_Adot3B__times_lxor_int8
-// A*B function (heap):      GB_AheapB__times_lxor_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_LXOR_INT8 || GxB_NO_TIMES_LXOR_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_int8
 GrB_Info GB_Adot2B__times_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_int8
 GrB_Info GB_Adot3B__times_lxor_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_uint16.c b/Source/Generated/GB_AxB__times_lxor_uint16.c
index e4948ff675..c8f861eedd 100644
--- a/Source/Generated/GB_AxB__times_lxor_uint16.c
+++ b/Source/Generated/GB_AxB__times_lxor_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_uint16
 // A'*B function (dot2):     GB_Adot2B__times_lxor_uint16
 // A'*B function (dot3):     GB_Adot3B__times_lxor_uint16
-// A*B function (heap):      GB_AheapB__times_lxor_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_LXOR_UINT16 || GxB_NO_TIMES_LXOR_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_uint16
 GrB_Info GB_Adot2B__times_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_uint16
 GrB_Info GB_Adot3B__times_lxor_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_uint32.c b/Source/Generated/GB_AxB__times_lxor_uint32.c
index 0379416799..d630f16e9e 100644
--- a/Source/Generated/GB_AxB__times_lxor_uint32.c
+++ b/Source/Generated/GB_AxB__times_lxor_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_uint32
 // A'*B function (dot2):     GB_Adot2B__times_lxor_uint32
 // A'*B function (dot3):     GB_Adot3B__times_lxor_uint32
-// A*B function (heap):      GB_AheapB__times_lxor_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_LXOR_UINT32 || GxB_NO_TIMES_LXOR_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_uint32
 GrB_Info GB_Adot2B__times_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_uint32
 GrB_Info GB_Adot3B__times_lxor_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_uint64.c b/Source/Generated/GB_AxB__times_lxor_uint64.c
index 8acb17b8a6..88d58527dd 100644
--- a/Source/Generated/GB_AxB__times_lxor_uint64.c
+++ b/Source/Generated/GB_AxB__times_lxor_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_uint64
 // A'*B function (dot2):     GB_Adot2B__times_lxor_uint64
 // A'*B function (dot3):     GB_Adot3B__times_lxor_uint64
-// A*B function (heap):      GB_AheapB__times_lxor_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_LXOR_UINT64 || GxB_NO_TIMES_LXOR_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_uint64
 GrB_Info GB_Adot2B__times_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_uint64
 GrB_Info GB_Adot3B__times_lxor_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_lxor_uint8.c b/Source/Generated/GB_AxB__times_lxor_uint8.c
index 742bd32ae7..8d21140fd0 100644
--- a/Source/Generated/GB_AxB__times_lxor_uint8.c
+++ b/Source/Generated/GB_AxB__times_lxor_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_lxor_uint8
 // A'*B function (dot2):     GB_Adot2B__times_lxor_uint8
 // A'*B function (dot3):     GB_Adot3B__times_lxor_uint8
-// A*B function (heap):      GB_AheapB__times_lxor_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_lxor_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_lxor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = ((aik != 0) != (bkj != 0))
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = ((aik != 0) != (bkj != 0)) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = ((x != 0) != (y != 0)) ;
+#define GB_MULT(z, x, y) \
+    z = ((x != 0) != (y != 0))
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = ((x != 0) != (y != 0)) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_LXOR_UINT8 || GxB_NO_TIMES_LXOR_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_lxor_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_lxor_uint8
 GrB_Info GB_Adot2B__times_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_lxor_uint8
 GrB_Info GB_Adot3B__times_lxor_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_lxor_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_lxor_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_lxor_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_fp32.c b/Source/Generated/GB_AxB__times_max_fp32.c
index 9c23b0e695..8d65f47b7f 100644
--- a/Source/Generated/GB_AxB__times_max_fp32.c
+++ b/Source/Generated/GB_AxB__times_max_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_fp32
 // A'*B function (dot2):     GB_Adot2B__times_max_fp32
 // A'*B function (dot3):     GB_Adot3B__times_max_fp32
-// A*B function (heap):      GB_AheapB__times_max_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmaxf (aik, bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= fmaxf (aik, bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmaxf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmaxf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= fmaxf (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= fmaxf (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_MAX_FP32 || GxB_NO_TIMES_MAX_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_max_fp32
 GrB_Info GB_Adot2B__times_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_fp32
 GrB_Info GB_Adot3B__times_max_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_max_fp32
+GrB_Info GB_Asaxpy3B__times_max_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_fp64.c b/Source/Generated/GB_AxB__times_max_fp64.c
index cbded118d9..21288e5f4c 100644
--- a/Source/Generated/GB_AxB__times_max_fp64.c
+++ b/Source/Generated/GB_AxB__times_max_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_fp64
 // A'*B function (dot2):     GB_Adot2B__times_max_fp64
 // A'*B function (dot3):     GB_Adot3B__times_max_fp64
-// A*B function (heap):      GB_AheapB__times_max_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmax (aik, bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= fmax (aik, bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmax (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmax (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= fmax (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= fmax (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_MAX_FP64 || GxB_NO_TIMES_MAX_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_max_fp64
 GrB_Info GB_Adot2B__times_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_fp64
 GrB_Info GB_Adot3B__times_max_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_max_fp64
+GrB_Info GB_Asaxpy3B__times_max_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_int16.c b/Source/Generated/GB_AxB__times_max_int16.c
index 6607a3a0df..8ba03428f3 100644
--- a/Source/Generated/GB_AxB__times_max_int16.c
+++ b/Source/Generated/GB_AxB__times_max_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_int16
 // A'*B function (dot2):     GB_Adot2B__times_max_int16
 // A'*B function (dot3):     GB_Adot3B__times_max_int16
-// A*B function (heap):      GB_AheapB__times_max_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_MAX_INT16 || GxB_NO_TIMES_MAX_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_int16
 GrB_Info GB_Adot2B__times_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_int16
 GrB_Info GB_Adot3B__times_max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_int32.c b/Source/Generated/GB_AxB__times_max_int32.c
index 99df433d9e..5e47dc30ba 100644
--- a/Source/Generated/GB_AxB__times_max_int32.c
+++ b/Source/Generated/GB_AxB__times_max_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_int32
 // A'*B function (dot2):     GB_Adot2B__times_max_int32
 // A'*B function (dot3):     GB_Adot3B__times_max_int32
-// A*B function (heap):      GB_AheapB__times_max_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_MAX_INT32 || GxB_NO_TIMES_MAX_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_int32
 GrB_Info GB_Adot2B__times_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_int32
 GrB_Info GB_Adot3B__times_max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_int64.c b/Source/Generated/GB_AxB__times_max_int64.c
index eb74040d71..2dce178a4f 100644
--- a/Source/Generated/GB_AxB__times_max_int64.c
+++ b/Source/Generated/GB_AxB__times_max_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_int64
 // A'*B function (dot2):     GB_Adot2B__times_max_int64
 // A'*B function (dot3):     GB_Adot3B__times_max_int64
-// A*B function (heap):      GB_AheapB__times_max_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_MAX_INT64 || GxB_NO_TIMES_MAX_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_int64
 GrB_Info GB_Adot2B__times_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_int64
 GrB_Info GB_Adot3B__times_max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_int8.c b/Source/Generated/GB_AxB__times_max_int8.c
index 1fe3595097..65bfa3876c 100644
--- a/Source/Generated/GB_AxB__times_max_int8.c
+++ b/Source/Generated/GB_AxB__times_max_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_int8
 // A'*B function (dot2):     GB_Adot2B__times_max_int8
 // A'*B function (dot3):     GB_Adot3B__times_max_int8
-// A*B function (heap):      GB_AheapB__times_max_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_MAX_INT8 || GxB_NO_TIMES_MAX_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_int8
 GrB_Info GB_Adot2B__times_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_int8
 GrB_Info GB_Adot3B__times_max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_uint16.c b/Source/Generated/GB_AxB__times_max_uint16.c
index 104b5e5aec..810aba83a8 100644
--- a/Source/Generated/GB_AxB__times_max_uint16.c
+++ b/Source/Generated/GB_AxB__times_max_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_uint16
 // A'*B function (dot2):     GB_Adot2B__times_max_uint16
 // A'*B function (dot3):     GB_Adot3B__times_max_uint16
-// A*B function (heap):      GB_AheapB__times_max_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_MAX_UINT16 || GxB_NO_TIMES_MAX_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_uint16
 GrB_Info GB_Adot2B__times_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_uint16
 GrB_Info GB_Adot3B__times_max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_uint32.c b/Source/Generated/GB_AxB__times_max_uint32.c
index 702b026895..7d1cdba2b3 100644
--- a/Source/Generated/GB_AxB__times_max_uint32.c
+++ b/Source/Generated/GB_AxB__times_max_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_uint32
 // A'*B function (dot2):     GB_Adot2B__times_max_uint32
 // A'*B function (dot3):     GB_Adot3B__times_max_uint32
-// A*B function (heap):      GB_AheapB__times_max_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_MAX_UINT32 || GxB_NO_TIMES_MAX_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_uint32
 GrB_Info GB_Adot2B__times_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_uint32
 GrB_Info GB_Adot3B__times_max_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_uint64.c b/Source/Generated/GB_AxB__times_max_uint64.c
index c9d5b0bbfd..8098ca7453 100644
--- a/Source/Generated/GB_AxB__times_max_uint64.c
+++ b/Source/Generated/GB_AxB__times_max_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_uint64
 // A'*B function (dot2):     GB_Adot2B__times_max_uint64
 // A'*B function (dot3):     GB_Adot3B__times_max_uint64
-// A*B function (heap):      GB_AheapB__times_max_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_MAX_UINT64 || GxB_NO_TIMES_MAX_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_uint64
 GrB_Info GB_Adot2B__times_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_uint64
 GrB_Info GB_Adot3B__times_max_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_max_uint8.c b/Source/Generated/GB_AxB__times_max_uint8.c
index 61b1086c01..34d3657f59 100644
--- a/Source/Generated/GB_AxB__times_max_uint8.c
+++ b/Source/Generated/GB_AxB__times_max_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_max_uint8
 // A'*B function (dot2):     GB_Adot2B__times_max_uint8
 // A'*B function (dot3):     GB_Adot3B__times_max_uint8
-// A*B function (heap):      GB_AheapB__times_max_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_max_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_max_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMAX (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IMAX (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMAX (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMAX (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMAX (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_MAX_UINT8 || GxB_NO_TIMES_MAX_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_max_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_max_uint8
 GrB_Info GB_Adot2B__times_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_max_uint8
 GrB_Info GB_Adot3B__times_max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_max_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_max_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_max_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_fp32.c b/Source/Generated/GB_AxB__times_min_fp32.c
index 9746f92e75..dd89927853 100644
--- a/Source/Generated/GB_AxB__times_min_fp32.c
+++ b/Source/Generated/GB_AxB__times_min_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_fp32
 // A'*B function (dot2):     GB_Adot2B__times_min_fp32
 // A'*B function (dot3):     GB_Adot3B__times_min_fp32
-// A*B function (heap):      GB_AheapB__times_min_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = fminf (aik, bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= fminf (aik, bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fminf (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fminf (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= fminf (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= fminf (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_MIN_FP32 || GxB_NO_TIMES_MIN_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_min_fp32
 GrB_Info GB_Adot2B__times_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_fp32
 GrB_Info GB_Adot3B__times_min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_min_fp32
+GrB_Info GB_Asaxpy3B__times_min_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_fp64.c b/Source/Generated/GB_AxB__times_min_fp64.c
index 633e9c182f..547285cd1c 100644
--- a/Source/Generated/GB_AxB__times_min_fp64.c
+++ b/Source/Generated/GB_AxB__times_min_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_fp64
 // A'*B function (dot2):     GB_Adot2B__times_min_fp64
 // A'*B function (dot3):     GB_Adot3B__times_min_fp64
-// A*B function (heap):      GB_AheapB__times_min_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = fmin (aik, bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= fmin (aik, bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = fmin (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = fmin (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= fmin (x, y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= fmin (x, y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_MIN_FP64 || GxB_NO_TIMES_MIN_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_min_fp64
 GrB_Info GB_Adot2B__times_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_fp64
 GrB_Info GB_Adot3B__times_min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_min_fp64
+GrB_Info GB_Asaxpy3B__times_min_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_int16.c b/Source/Generated/GB_AxB__times_min_int16.c
index e68a170351..274e081056 100644
--- a/Source/Generated/GB_AxB__times_min_int16.c
+++ b/Source/Generated/GB_AxB__times_min_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_int16
 // A'*B function (dot2):     GB_Adot2B__times_min_int16
 // A'*B function (dot3):     GB_Adot3B__times_min_int16
-// A*B function (heap):      GB_AheapB__times_min_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_MIN_INT16 || GxB_NO_TIMES_MIN_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_int16
 GrB_Info GB_Adot2B__times_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_int16
 GrB_Info GB_Adot3B__times_min_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_int32.c b/Source/Generated/GB_AxB__times_min_int32.c
index 7935d71d27..1a204e605e 100644
--- a/Source/Generated/GB_AxB__times_min_int32.c
+++ b/Source/Generated/GB_AxB__times_min_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_int32
 // A'*B function (dot2):     GB_Adot2B__times_min_int32
 // A'*B function (dot3):     GB_Adot3B__times_min_int32
-// A*B function (heap):      GB_AheapB__times_min_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_MIN_INT32 || GxB_NO_TIMES_MIN_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_int32
 GrB_Info GB_Adot2B__times_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_int32
 GrB_Info GB_Adot3B__times_min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_int64.c b/Source/Generated/GB_AxB__times_min_int64.c
index d72b002789..023e775a1f 100644
--- a/Source/Generated/GB_AxB__times_min_int64.c
+++ b/Source/Generated/GB_AxB__times_min_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_int64
 // A'*B function (dot2):     GB_Adot2B__times_min_int64
 // A'*B function (dot3):     GB_Adot3B__times_min_int64
-// A*B function (heap):      GB_AheapB__times_min_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_MIN_INT64 || GxB_NO_TIMES_MIN_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_int64
 GrB_Info GB_Adot2B__times_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_int64
 GrB_Info GB_Adot3B__times_min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_int8.c b/Source/Generated/GB_AxB__times_min_int8.c
index b6b561935b..0df10badbb 100644
--- a/Source/Generated/GB_AxB__times_min_int8.c
+++ b/Source/Generated/GB_AxB__times_min_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_int8
 // A'*B function (dot2):     GB_Adot2B__times_min_int8
 // A'*B function (dot3):     GB_Adot3B__times_min_int8
-// A*B function (heap):      GB_AheapB__times_min_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_MIN_INT8 || GxB_NO_TIMES_MIN_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_int8
 GrB_Info GB_Adot2B__times_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_int8
 GrB_Info GB_Adot3B__times_min_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_uint16.c b/Source/Generated/GB_AxB__times_min_uint16.c
index 20906d0159..c023f6634e 100644
--- a/Source/Generated/GB_AxB__times_min_uint16.c
+++ b/Source/Generated/GB_AxB__times_min_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_uint16
 // A'*B function (dot2):     GB_Adot2B__times_min_uint16
 // A'*B function (dot3):     GB_Adot3B__times_min_uint16
-// A*B function (heap):      GB_AheapB__times_min_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_MIN_UINT16 || GxB_NO_TIMES_MIN_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_uint16
 GrB_Info GB_Adot2B__times_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_uint16
 GrB_Info GB_Adot3B__times_min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_uint32.c b/Source/Generated/GB_AxB__times_min_uint32.c
index dcb35527e4..93c44553db 100644
--- a/Source/Generated/GB_AxB__times_min_uint32.c
+++ b/Source/Generated/GB_AxB__times_min_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_uint32
 // A'*B function (dot2):     GB_Adot2B__times_min_uint32
 // A'*B function (dot3):     GB_Adot3B__times_min_uint32
-// A*B function (heap):      GB_AheapB__times_min_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_MIN_UINT32 || GxB_NO_TIMES_MIN_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_uint32
 GrB_Info GB_Adot2B__times_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_uint32
 GrB_Info GB_Adot3B__times_min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_uint64.c b/Source/Generated/GB_AxB__times_min_uint64.c
index be36005ff3..3bd3aadd8c 100644
--- a/Source/Generated/GB_AxB__times_min_uint64.c
+++ b/Source/Generated/GB_AxB__times_min_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_uint64
 // A'*B function (dot2):     GB_Adot2B__times_min_uint64
 // A'*B function (dot3):     GB_Adot3B__times_min_uint64
-// A*B function (heap):      GB_AheapB__times_min_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_MIN_UINT64 || GxB_NO_TIMES_MIN_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_uint64
 GrB_Info GB_Adot2B__times_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_uint64
 GrB_Info GB_Adot3B__times_min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_min_uint8.c b/Source/Generated/GB_AxB__times_min_uint8.c
index 42d1bc2535..4472ce736c 100644
--- a/Source/Generated/GB_AxB__times_min_uint8.c
+++ b/Source/Generated/GB_AxB__times_min_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_min_uint8
 // A'*B function (dot2):     GB_Adot2B__times_min_uint8
 // A'*B function (dot3):     GB_Adot3B__times_min_uint8
-// A*B function (heap):      GB_AheapB__times_min_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_min_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_min_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IMIN (aik, bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IMIN (aik, bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IMIN (x, y) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IMIN (x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IMIN (x, y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_MIN_UINT8 || GxB_NO_TIMES_MIN_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_min_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_min_uint8
 GrB_Info GB_Adot2B__times_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_min_uint8
 GrB_Info GB_Adot3B__times_min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_min_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_min_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_min_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_fp32.c b/Source/Generated/GB_AxB__times_minus_fp32.c
index 3adff36788..d029b6a147 100644
--- a/Source/Generated/GB_AxB__times_minus_fp32.c
+++ b/Source/Generated/GB_AxB__times_minus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_fp32
 // A'*B function (dot2):     GB_Adot2B__times_minus_fp32
 // A'*B function (dot3):     GB_Adot3B__times_minus_fp32
-// A*B function (heap):      GB_AheapB__times_minus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik - bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x - y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x - y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_MINUS_FP32 || GxB_NO_TIMES_MINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_fp32
 GrB_Info GB_Adot2B__times_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_fp32
 GrB_Info GB_Adot3B__times_minus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_minus_fp32
+GrB_Info GB_Asaxpy3B__times_minus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_fp64.c b/Source/Generated/GB_AxB__times_minus_fp64.c
index 5039cd42ac..7d73944b61 100644
--- a/Source/Generated/GB_AxB__times_minus_fp64.c
+++ b/Source/Generated/GB_AxB__times_minus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_fp64
 // A'*B function (dot2):     GB_Adot2B__times_minus_fp64
 // A'*B function (dot3):     GB_Adot3B__times_minus_fp64
-// A*B function (heap):      GB_AheapB__times_minus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik - bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik - bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x - y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x - y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_MINUS_FP64 || GxB_NO_TIMES_MINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_fp64
 GrB_Info GB_Adot2B__times_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_fp64
 GrB_Info GB_Adot3B__times_minus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_minus_fp64
+GrB_Info GB_Asaxpy3B__times_minus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_int16.c b/Source/Generated/GB_AxB__times_minus_int16.c
index 82f8b78b98..b777725157 100644
--- a/Source/Generated/GB_AxB__times_minus_int16.c
+++ b/Source/Generated/GB_AxB__times_minus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_int16
 // A'*B function (dot2):     GB_Adot2B__times_minus_int16
 // A'*B function (dot3):     GB_Adot3B__times_minus_int16
-// A*B function (heap):      GB_AheapB__times_minus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_MINUS_INT16 || GxB_NO_TIMES_MINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_int16
 GrB_Info GB_Adot2B__times_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_int16
 GrB_Info GB_Adot3B__times_minus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_int32.c b/Source/Generated/GB_AxB__times_minus_int32.c
index 67a2da54ca..034a6ceae8 100644
--- a/Source/Generated/GB_AxB__times_minus_int32.c
+++ b/Source/Generated/GB_AxB__times_minus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_int32
 // A'*B function (dot2):     GB_Adot2B__times_minus_int32
 // A'*B function (dot3):     GB_Adot3B__times_minus_int32
-// A*B function (heap):      GB_AheapB__times_minus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_MINUS_INT32 || GxB_NO_TIMES_MINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_int32
 GrB_Info GB_Adot2B__times_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_int32
 GrB_Info GB_Adot3B__times_minus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_int64.c b/Source/Generated/GB_AxB__times_minus_int64.c
index fcacf13dd4..d5fab2b6d5 100644
--- a/Source/Generated/GB_AxB__times_minus_int64.c
+++ b/Source/Generated/GB_AxB__times_minus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_int64
 // A'*B function (dot2):     GB_Adot2B__times_minus_int64
 // A'*B function (dot3):     GB_Adot3B__times_minus_int64
-// A*B function (heap):      GB_AheapB__times_minus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_MINUS_INT64 || GxB_NO_TIMES_MINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_int64
 GrB_Info GB_Adot2B__times_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_int64
 GrB_Info GB_Adot3B__times_minus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_int8.c b/Source/Generated/GB_AxB__times_minus_int8.c
index 96c13745df..afcf781bee 100644
--- a/Source/Generated/GB_AxB__times_minus_int8.c
+++ b/Source/Generated/GB_AxB__times_minus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_int8
 // A'*B function (dot2):     GB_Adot2B__times_minus_int8
 // A'*B function (dot3):     GB_Adot3B__times_minus_int8
-// A*B function (heap):      GB_AheapB__times_minus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_MINUS_INT8 || GxB_NO_TIMES_MINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_int8
 GrB_Info GB_Adot2B__times_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_int8
 GrB_Info GB_Adot3B__times_minus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_uint16.c b/Source/Generated/GB_AxB__times_minus_uint16.c
index 6661bcb5e7..100d0e689b 100644
--- a/Source/Generated/GB_AxB__times_minus_uint16.c
+++ b/Source/Generated/GB_AxB__times_minus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_uint16
 // A'*B function (dot2):     GB_Adot2B__times_minus_uint16
 // A'*B function (dot3):     GB_Adot3B__times_minus_uint16
-// A*B function (heap):      GB_AheapB__times_minus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_MINUS_UINT16 || GxB_NO_TIMES_MINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_uint16
 GrB_Info GB_Adot2B__times_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_uint16
 GrB_Info GB_Adot3B__times_minus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_uint32.c b/Source/Generated/GB_AxB__times_minus_uint32.c
index 6cdcaf0792..d636f6c784 100644
--- a/Source/Generated/GB_AxB__times_minus_uint32.c
+++ b/Source/Generated/GB_AxB__times_minus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_uint32
 // A'*B function (dot2):     GB_Adot2B__times_minus_uint32
 // A'*B function (dot3):     GB_Adot3B__times_minus_uint32
-// A*B function (heap):      GB_AheapB__times_minus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_MINUS_UINT32 || GxB_NO_TIMES_MINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_uint32
 GrB_Info GB_Adot2B__times_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_uint32
 GrB_Info GB_Adot3B__times_minus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_uint64.c b/Source/Generated/GB_AxB__times_minus_uint64.c
index cceab3c0cf..992a6099ea 100644
--- a/Source/Generated/GB_AxB__times_minus_uint64.c
+++ b/Source/Generated/GB_AxB__times_minus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_uint64
 // A'*B function (dot2):     GB_Adot2B__times_minus_uint64
 // A'*B function (dot3):     GB_Adot3B__times_minus_uint64
-// A*B function (heap):      GB_AheapB__times_minus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_MINUS_UINT64 || GxB_NO_TIMES_MINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_uint64
 GrB_Info GB_Adot2B__times_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_uint64
 GrB_Info GB_Adot3B__times_minus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_minus_uint8.c b/Source/Generated/GB_AxB__times_minus_uint8.c
index 272793f664..15b8c1b1c7 100644
--- a/Source/Generated/GB_AxB__times_minus_uint8.c
+++ b/Source/Generated/GB_AxB__times_minus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_minus_uint8
 // A'*B function (dot2):     GB_Adot2B__times_minus_uint8
 // A'*B function (dot3):     GB_Adot3B__times_minus_uint8
-// A*B function (heap):      GB_AheapB__times_minus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_minus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_minus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik - bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik - bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x - y) ;
+#define GB_MULT(z, x, y) \
+    z = (x - y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x - y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x - y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_MINUS_UINT8 || GxB_NO_TIMES_MINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_minus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_minus_uint8
 GrB_Info GB_Adot2B__times_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_minus_uint8
 GrB_Info GB_Adot3B__times_minus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_minus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_minus_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_minus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_fp32.c b/Source/Generated/GB_AxB__times_plus_fp32.c
index ffc495de7b..3cb9472287 100644
--- a/Source/Generated/GB_AxB__times_plus_fp32.c
+++ b/Source/Generated/GB_AxB__times_plus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_fp32
 // A'*B function (dot2):     GB_Adot2B__times_plus_fp32
 // A'*B function (dot3):     GB_Adot3B__times_plus_fp32
-// A*B function (heap):      GB_AheapB__times_plus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik + bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x + y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x + y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_PLUS_FP32 || GxB_NO_TIMES_PLUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_fp32
 GrB_Info GB_Adot2B__times_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_fp32
 GrB_Info GB_Adot3B__times_plus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_plus_fp32
+GrB_Info GB_Asaxpy3B__times_plus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_fp64.c b/Source/Generated/GB_AxB__times_plus_fp64.c
index 0b6843fdef..8596b099c3 100644
--- a/Source/Generated/GB_AxB__times_plus_fp64.c
+++ b/Source/Generated/GB_AxB__times_plus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_fp64
 // A'*B function (dot2):     GB_Adot2B__times_plus_fp64
 // A'*B function (dot3):     GB_Adot3B__times_plus_fp64
-// A*B function (heap):      GB_AheapB__times_plus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik + bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik + bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x + y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x + y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_PLUS_FP64 || GxB_NO_TIMES_PLUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_fp64
 GrB_Info GB_Adot2B__times_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_fp64
 GrB_Info GB_Adot3B__times_plus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_plus_fp64
+GrB_Info GB_Asaxpy3B__times_plus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_int16.c b/Source/Generated/GB_AxB__times_plus_int16.c
index ca9dff449f..ad89977cdb 100644
--- a/Source/Generated/GB_AxB__times_plus_int16.c
+++ b/Source/Generated/GB_AxB__times_plus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_int16
 // A'*B function (dot2):     GB_Adot2B__times_plus_int16
 // A'*B function (dot3):     GB_Adot3B__times_plus_int16
-// A*B function (heap):      GB_AheapB__times_plus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_PLUS_INT16 || GxB_NO_TIMES_PLUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_int16
 GrB_Info GB_Adot2B__times_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_int16
 GrB_Info GB_Adot3B__times_plus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_int32.c b/Source/Generated/GB_AxB__times_plus_int32.c
index 206cdae82c..9222947a0a 100644
--- a/Source/Generated/GB_AxB__times_plus_int32.c
+++ b/Source/Generated/GB_AxB__times_plus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_int32
 // A'*B function (dot2):     GB_Adot2B__times_plus_int32
 // A'*B function (dot3):     GB_Adot3B__times_plus_int32
-// A*B function (heap):      GB_AheapB__times_plus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_PLUS_INT32 || GxB_NO_TIMES_PLUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_int32
 GrB_Info GB_Adot2B__times_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_int32
 GrB_Info GB_Adot3B__times_plus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_int64.c b/Source/Generated/GB_AxB__times_plus_int64.c
index 67554ac96c..037bff6e6e 100644
--- a/Source/Generated/GB_AxB__times_plus_int64.c
+++ b/Source/Generated/GB_AxB__times_plus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_int64
 // A'*B function (dot2):     GB_Adot2B__times_plus_int64
 // A'*B function (dot3):     GB_Adot3B__times_plus_int64
-// A*B function (heap):      GB_AheapB__times_plus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_PLUS_INT64 || GxB_NO_TIMES_PLUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_int64
 GrB_Info GB_Adot2B__times_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_int64
 GrB_Info GB_Adot3B__times_plus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_int8.c b/Source/Generated/GB_AxB__times_plus_int8.c
index 419d81144a..94701b0230 100644
--- a/Source/Generated/GB_AxB__times_plus_int8.c
+++ b/Source/Generated/GB_AxB__times_plus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_int8
 // A'*B function (dot2):     GB_Adot2B__times_plus_int8
 // A'*B function (dot3):     GB_Adot3B__times_plus_int8
-// A*B function (heap):      GB_AheapB__times_plus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_PLUS_INT8 || GxB_NO_TIMES_PLUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_int8
 GrB_Info GB_Adot2B__times_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_int8
 GrB_Info GB_Adot3B__times_plus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_uint16.c b/Source/Generated/GB_AxB__times_plus_uint16.c
index 4389091d1b..e689651216 100644
--- a/Source/Generated/GB_AxB__times_plus_uint16.c
+++ b/Source/Generated/GB_AxB__times_plus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_uint16
 // A'*B function (dot2):     GB_Adot2B__times_plus_uint16
 // A'*B function (dot3):     GB_Adot3B__times_plus_uint16
-// A*B function (heap):      GB_AheapB__times_plus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_PLUS_UINT16 || GxB_NO_TIMES_PLUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_uint16
 GrB_Info GB_Adot2B__times_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_uint16
 GrB_Info GB_Adot3B__times_plus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_uint32.c b/Source/Generated/GB_AxB__times_plus_uint32.c
index 54d9ca7e06..87e6095f53 100644
--- a/Source/Generated/GB_AxB__times_plus_uint32.c
+++ b/Source/Generated/GB_AxB__times_plus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_uint32
 // A'*B function (dot2):     GB_Adot2B__times_plus_uint32
 // A'*B function (dot3):     GB_Adot3B__times_plus_uint32
-// A*B function (heap):      GB_AheapB__times_plus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_PLUS_UINT32 || GxB_NO_TIMES_PLUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_uint32
 GrB_Info GB_Adot2B__times_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_uint32
 GrB_Info GB_Adot3B__times_plus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_uint64.c b/Source/Generated/GB_AxB__times_plus_uint64.c
index ab9ce63218..cbba1baa5b 100644
--- a/Source/Generated/GB_AxB__times_plus_uint64.c
+++ b/Source/Generated/GB_AxB__times_plus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_uint64
 // A'*B function (dot2):     GB_Adot2B__times_plus_uint64
 // A'*B function (dot3):     GB_Adot3B__times_plus_uint64
-// A*B function (heap):      GB_AheapB__times_plus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_PLUS_UINT64 || GxB_NO_TIMES_PLUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_uint64
 GrB_Info GB_Adot2B__times_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_uint64
 GrB_Info GB_Adot3B__times_plus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_plus_uint8.c b/Source/Generated/GB_AxB__times_plus_uint8.c
index d621891eed..306657da52 100644
--- a/Source/Generated/GB_AxB__times_plus_uint8.c
+++ b/Source/Generated/GB_AxB__times_plus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_plus_uint8
 // A'*B function (dot2):     GB_Adot2B__times_plus_uint8
 // A'*B function (dot3):     GB_Adot3B__times_plus_uint8
-// A*B function (heap):      GB_AheapB__times_plus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_plus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_plus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik + bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik + bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x + y) ;
+#define GB_MULT(z, x, y) \
+    z = (x + y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x + y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x + y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_PLUS_UINT8 || GxB_NO_TIMES_PLUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_plus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_plus_uint8
 GrB_Info GB_Adot2B__times_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_plus_uint8
 GrB_Info GB_Adot3B__times_plus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_plus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_plus_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_plus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_fp32.c b/Source/Generated/GB_AxB__times_rdiv_fp32.c
index 1525cf2d6e..c03ba91a41 100644
--- a/Source/Generated/GB_AxB__times_rdiv_fp32.c
+++ b/Source/Generated/GB_AxB__times_rdiv_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_fp32
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_fp32
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_fp32
-// A*B function (heap):      GB_AheapB__times_rdiv_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (bkj / aik)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (y / x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (y / x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_RDIV_FP32 || GxB_NO_TIMES_RDIV_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_fp32
 GrB_Info GB_Adot2B__times_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_fp32
 GrB_Info GB_Adot3B__times_rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_rdiv_fp32
+GrB_Info GB_Asaxpy3B__times_rdiv_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_fp64.c b/Source/Generated/GB_AxB__times_rdiv_fp64.c
index 75a769f634..68f8b493b7 100644
--- a/Source/Generated/GB_AxB__times_rdiv_fp64.c
+++ b/Source/Generated/GB_AxB__times_rdiv_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_fp64
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_fp64
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_fp64
-// A*B function (heap):      GB_AheapB__times_rdiv_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj / aik)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (bkj / aik)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y / x) ;
+#define GB_MULT(z, x, y) \
+    z = (y / x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (y / x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (y / x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_RDIV_FP64 || GxB_NO_TIMES_RDIV_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_fp64
 GrB_Info GB_Adot2B__times_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_fp64
 GrB_Info GB_Adot3B__times_rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_rdiv_fp64
+GrB_Info GB_Asaxpy3B__times_rdiv_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_int16.c b/Source/Generated/GB_AxB__times_rdiv_int16.c
index 62f6396c53..a599d7f574 100644
--- a/Source/Generated/GB_AxB__times_rdiv_int16.c
+++ b/Source/Generated/GB_AxB__times_rdiv_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_int16
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_int16
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_int16
-// A*B function (heap):      GB_AheapB__times_rdiv_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 16)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 16) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = GB_IDIV_SIGNED (y, x, 16) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_RDIV_INT16 || GxB_NO_TIMES_RDIV_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_int16
 GrB_Info GB_Adot2B__times_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_int16
 GrB_Info GB_Adot3B__times_rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_int32.c b/Source/Generated/GB_AxB__times_rdiv_int32.c
index d8f0f70e0b..0da8546d01 100644
--- a/Source/Generated/GB_AxB__times_rdiv_int32.c
+++ b/Source/Generated/GB_AxB__times_rdiv_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_int32
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_int32
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_int32
-// A*B function (heap):      GB_AheapB__times_rdiv_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 32)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 32) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = GB_IDIV_SIGNED (y, x, 32) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_RDIV_INT32 || GxB_NO_TIMES_RDIV_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_int32
 GrB_Info GB_Adot2B__times_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_int32
 GrB_Info GB_Adot3B__times_rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_int64.c b/Source/Generated/GB_AxB__times_rdiv_int64.c
index c123036cd6..51df0ffb2a 100644
--- a/Source/Generated/GB_AxB__times_rdiv_int64.c
+++ b/Source/Generated/GB_AxB__times_rdiv_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_int64
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_int64
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_int64
-// A*B function (heap):      GB_AheapB__times_rdiv_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 64)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 64) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = GB_IDIV_SIGNED (y, x, 64) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_RDIV_INT64 || GxB_NO_TIMES_RDIV_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_int64
 GrB_Info GB_Adot2B__times_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_int64
 GrB_Info GB_Adot3B__times_rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_int8.c b/Source/Generated/GB_AxB__times_rdiv_int8.c
index dbb91e5174..1d4176fbb5 100644
--- a/Source/Generated/GB_AxB__times_rdiv_int8.c
+++ b/Source/Generated/GB_AxB__times_rdiv_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_int8
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_int8
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_int8
-// A*B function (heap):      GB_AheapB__times_rdiv_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = GB_IDIV_SIGNED (bkj, aik, 8)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = GB_IDIV_SIGNED (bkj, aik, 8) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_SIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_SIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = GB_IDIV_SIGNED (y, x, 8) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_RDIV_INT8 || GxB_NO_TIMES_RDIV_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_int8
 GrB_Info GB_Adot2B__times_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_int8
 GrB_Info GB_Adot3B__times_rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_uint16.c b/Source/Generated/GB_AxB__times_rdiv_uint16.c
index 657cec4966..f87940c89e 100644
--- a/Source/Generated/GB_AxB__times_rdiv_uint16.c
+++ b/Source/Generated/GB_AxB__times_rdiv_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_uint16
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_uint16
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_uint16
-// A*B function (heap):      GB_AheapB__times_rdiv_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 16)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 16) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 16) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 16)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = GB_IDIV_UNSIGNED (y, x, 16) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_RDIV_UINT16 || GxB_NO_TIMES_RDIV_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_uint16
 GrB_Info GB_Adot2B__times_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_uint16
 GrB_Info GB_Adot3B__times_rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_uint32.c b/Source/Generated/GB_AxB__times_rdiv_uint32.c
index d6c87f6afa..93ae432212 100644
--- a/Source/Generated/GB_AxB__times_rdiv_uint32.c
+++ b/Source/Generated/GB_AxB__times_rdiv_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_uint32
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_uint32
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_uint32
-// A*B function (heap):      GB_AheapB__times_rdiv_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 32)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 32) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 32) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 32)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = GB_IDIV_UNSIGNED (y, x, 32) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_RDIV_UINT32 || GxB_NO_TIMES_RDIV_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_uint32
 GrB_Info GB_Adot2B__times_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_uint32
 GrB_Info GB_Adot3B__times_rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_uint64.c b/Source/Generated/GB_AxB__times_rdiv_uint64.c
index df5c6e08fb..b02130b89f 100644
--- a/Source/Generated/GB_AxB__times_rdiv_uint64.c
+++ b/Source/Generated/GB_AxB__times_rdiv_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_uint64
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_uint64
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_uint64
-// A*B function (heap):      GB_AheapB__times_rdiv_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 64)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 64) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 64) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 64)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = GB_IDIV_UNSIGNED (y, x, 64) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_RDIV_UINT64 || GxB_NO_TIMES_RDIV_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_uint64
 GrB_Info GB_Adot2B__times_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_uint64
 GrB_Info GB_Adot3B__times_rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rdiv_uint8.c b/Source/Generated/GB_AxB__times_rdiv_uint8.c
index cfcfd297eb..caca90843b 100644
--- a/Source/Generated/GB_AxB__times_rdiv_uint8.c
+++ b/Source/Generated/GB_AxB__times_rdiv_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rdiv_uint8
 // A'*B function (dot2):     GB_Adot2B__times_rdiv_uint8
 // A'*B function (dot3):     GB_Adot3B__times_rdiv_uint8
-// A*B function (heap):      GB_AheapB__times_rdiv_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_rdiv_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rdiv_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = GB_IDIV_UNSIGNED (bkj, aik, 8)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = GB_IDIV_UNSIGNED (bkj, aik, 8) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = GB_IDIV_UNSIGNED (y, x, 8) ;
+#define GB_MULT(z, x, y) \
+    z = GB_IDIV_UNSIGNED (y, x, 8)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = GB_IDIV_UNSIGNED (y, x, 8) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_RDIV_UINT8 || GxB_NO_TIMES_RDIV_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rdiv_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rdiv_uint8
 GrB_Info GB_Adot2B__times_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rdiv_uint8
 GrB_Info GB_Adot3B__times_rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rdiv_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rdiv_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rdiv_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_fp32.c b/Source/Generated/GB_AxB__times_rminus_fp32.c
index 2b67ce0d01..8c97101942 100644
--- a/Source/Generated/GB_AxB__times_rminus_fp32.c
+++ b/Source/Generated/GB_AxB__times_rminus_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_fp32
 // A'*B function (dot2):     GB_Adot2B__times_rminus_fp32
 // A'*B function (dot3):     GB_Adot3B__times_rminus_fp32
-// A*B function (heap):      GB_AheapB__times_rminus_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (bkj - aik)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (y - x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (y - x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_RMINUS_FP32 || GxB_NO_TIMES_RMINUS_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_fp32
 GrB_Info GB_Adot2B__times_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_fp32
 GrB_Info GB_Adot3B__times_rminus_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_rminus_fp32
+GrB_Info GB_Asaxpy3B__times_rminus_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_fp64.c b/Source/Generated/GB_AxB__times_rminus_fp64.c
index f339a65557..fa3c01945f 100644
--- a/Source/Generated/GB_AxB__times_rminus_fp64.c
+++ b/Source/Generated/GB_AxB__times_rminus_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_fp64
 // A'*B function (dot2):     GB_Adot2B__times_rminus_fp64
 // A'*B function (dot3):     GB_Adot3B__times_rminus_fp64
-// A*B function (heap):      GB_AheapB__times_rminus_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (bkj - aik)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (bkj - aik)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (y - x) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (y - x)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_RMINUS_FP64 || GxB_NO_TIMES_RMINUS_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_fp64
 GrB_Info GB_Adot2B__times_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_fp64
 GrB_Info GB_Adot3B__times_rminus_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_rminus_fp64
+GrB_Info GB_Asaxpy3B__times_rminus_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_int16.c b/Source/Generated/GB_AxB__times_rminus_int16.c
index b6ca85c76b..97ecdd8289 100644
--- a/Source/Generated/GB_AxB__times_rminus_int16.c
+++ b/Source/Generated/GB_AxB__times_rminus_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_int16
 // A'*B function (dot2):     GB_Adot2B__times_rminus_int16
 // A'*B function (dot3):     GB_Adot3B__times_rminus_int16
-// A*B function (heap):      GB_AheapB__times_rminus_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_RMINUS_INT16 || GxB_NO_TIMES_RMINUS_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_int16
 GrB_Info GB_Adot2B__times_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_int16
 GrB_Info GB_Adot3B__times_rminus_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_int32.c b/Source/Generated/GB_AxB__times_rminus_int32.c
index 8dfcee1738..ca7720c544 100644
--- a/Source/Generated/GB_AxB__times_rminus_int32.c
+++ b/Source/Generated/GB_AxB__times_rminus_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_int32
 // A'*B function (dot2):     GB_Adot2B__times_rminus_int32
 // A'*B function (dot3):     GB_Adot3B__times_rminus_int32
-// A*B function (heap):      GB_AheapB__times_rminus_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_RMINUS_INT32 || GxB_NO_TIMES_RMINUS_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_int32
 GrB_Info GB_Adot2B__times_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_int32
 GrB_Info GB_Adot3B__times_rminus_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_int64.c b/Source/Generated/GB_AxB__times_rminus_int64.c
index 9229d66966..f00e91df7e 100644
--- a/Source/Generated/GB_AxB__times_rminus_int64.c
+++ b/Source/Generated/GB_AxB__times_rminus_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_int64
 // A'*B function (dot2):     GB_Adot2B__times_rminus_int64
 // A'*B function (dot3):     GB_Adot3B__times_rminus_int64
-// A*B function (heap):      GB_AheapB__times_rminus_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_RMINUS_INT64 || GxB_NO_TIMES_RMINUS_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_int64
 GrB_Info GB_Adot2B__times_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_int64
 GrB_Info GB_Adot3B__times_rminus_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_int8.c b/Source/Generated/GB_AxB__times_rminus_int8.c
index 0c60c350b4..a4f61e931a 100644
--- a/Source/Generated/GB_AxB__times_rminus_int8.c
+++ b/Source/Generated/GB_AxB__times_rminus_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_int8
 // A'*B function (dot2):     GB_Adot2B__times_rminus_int8
 // A'*B function (dot3):     GB_Adot3B__times_rminus_int8
-// A*B function (heap):      GB_AheapB__times_rminus_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_RMINUS_INT8 || GxB_NO_TIMES_RMINUS_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_int8
 GrB_Info GB_Adot2B__times_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_int8
 GrB_Info GB_Adot3B__times_rminus_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_uint16.c b/Source/Generated/GB_AxB__times_rminus_uint16.c
index d8b1204a2a..91be1f1400 100644
--- a/Source/Generated/GB_AxB__times_rminus_uint16.c
+++ b/Source/Generated/GB_AxB__times_rminus_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_uint16
 // A'*B function (dot2):     GB_Adot2B__times_rminus_uint16
 // A'*B function (dot3):     GB_Adot3B__times_rminus_uint16
-// A*B function (heap):      GB_AheapB__times_rminus_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_RMINUS_UINT16 || GxB_NO_TIMES_RMINUS_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_uint16
 GrB_Info GB_Adot2B__times_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_uint16
 GrB_Info GB_Adot3B__times_rminus_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_uint32.c b/Source/Generated/GB_AxB__times_rminus_uint32.c
index 32da486153..bc3cff56c3 100644
--- a/Source/Generated/GB_AxB__times_rminus_uint32.c
+++ b/Source/Generated/GB_AxB__times_rminus_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_uint32
 // A'*B function (dot2):     GB_Adot2B__times_rminus_uint32
 // A'*B function (dot3):     GB_Adot3B__times_rminus_uint32
-// A*B function (heap):      GB_AheapB__times_rminus_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_RMINUS_UINT32 || GxB_NO_TIMES_RMINUS_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_uint32
 GrB_Info GB_Adot2B__times_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_uint32
 GrB_Info GB_Adot3B__times_rminus_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_uint64.c b/Source/Generated/GB_AxB__times_rminus_uint64.c
index 797a999b85..11d80b50e2 100644
--- a/Source/Generated/GB_AxB__times_rminus_uint64.c
+++ b/Source/Generated/GB_AxB__times_rminus_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_uint64
 // A'*B function (dot2):     GB_Adot2B__times_rminus_uint64
 // A'*B function (dot3):     GB_Adot3B__times_rminus_uint64
-// A*B function (heap):      GB_AheapB__times_rminus_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_RMINUS_UINT64 || GxB_NO_TIMES_RMINUS_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_uint64
 GrB_Info GB_Adot2B__times_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_uint64
 GrB_Info GB_Adot3B__times_rminus_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_rminus_uint8.c b/Source/Generated/GB_AxB__times_rminus_uint8.c
index bee5eb73cc..886740e9da 100644
--- a/Source/Generated/GB_AxB__times_rminus_uint8.c
+++ b/Source/Generated/GB_AxB__times_rminus_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_rminus_uint8
 // A'*B function (dot2):     GB_Adot2B__times_rminus_uint8
 // A'*B function (dot3):     GB_Adot3B__times_rminus_uint8
-// A*B function (heap):      GB_AheapB__times_rminus_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_rminus_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_rminus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (bkj - aik)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (bkj - aik) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (y - x) ;
+#define GB_MULT(z, x, y) \
+    z = (y - x)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (y - x) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (y - x) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_RMINUS_UINT8 || GxB_NO_TIMES_RMINUS_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_rminus_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_rminus_uint8
 GrB_Info GB_Adot2B__times_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_rminus_uint8
 GrB_Info GB_Adot3B__times_rminus_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_rminus_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_rminus_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_rminus_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_fp32.c b/Source/Generated/GB_AxB__times_second_fp32.c
index 5bb2b34452..92db786a2d 100644
--- a/Source/Generated/GB_AxB__times_second_fp32.c
+++ b/Source/Generated/GB_AxB__times_second_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_fp32
 // A'*B function (dot2):     GB_Adot2B__times_second_fp32
 // A'*B function (dot3):     GB_Adot3B__times_second_fp32
-// A*B function (heap):      GB_AheapB__times_second_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_SECOND_FP32 || GxB_NO_TIMES_SECOND_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_fp32
 GrB_Info GB_Adot2B__times_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_fp32
 GrB_Info GB_Adot3B__times_second_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_fp32
+GrB_Info GB_Asaxpy3B__times_second_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_fp64.c b/Source/Generated/GB_AxB__times_second_fp64.c
index 8c8aeab5fc..a8961c6aa0 100644
--- a/Source/Generated/GB_AxB__times_second_fp64.c
+++ b/Source/Generated/GB_AxB__times_second_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_fp64
 // A'*B function (dot2):     GB_Adot2B__times_second_fp64
 // A'*B function (dot3):     GB_Adot3B__times_second_fp64
-// A*B function (heap):      GB_AheapB__times_second_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_SECOND_FP64 || GxB_NO_TIMES_SECOND_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_fp64
 GrB_Info GB_Adot2B__times_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_fp64
 GrB_Info GB_Adot3B__times_second_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_fp64
+GrB_Info GB_Asaxpy3B__times_second_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_int16.c b/Source/Generated/GB_AxB__times_second_int16.c
index ccbcada9a0..272582271c 100644
--- a/Source/Generated/GB_AxB__times_second_int16.c
+++ b/Source/Generated/GB_AxB__times_second_int16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_int16
 // A'*B function (dot2):     GB_Adot2B__times_second_int16
 // A'*B function (dot3):     GB_Adot3B__times_second_int16
-// A*B function (heap):      GB_AheapB__times_second_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_SECOND_INT16 || GxB_NO_TIMES_SECOND_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_int16
 GrB_Info GB_Adot2B__times_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_int16
 GrB_Info GB_Adot3B__times_second_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_int16
+GrB_Info GB_Asaxpy3B__times_second_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_int32.c b/Source/Generated/GB_AxB__times_second_int32.c
index b8e94a41fd..add7334ec5 100644
--- a/Source/Generated/GB_AxB__times_second_int32.c
+++ b/Source/Generated/GB_AxB__times_second_int32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_int32
 // A'*B function (dot2):     GB_Adot2B__times_second_int32
 // A'*B function (dot3):     GB_Adot3B__times_second_int32
-// A*B function (heap):      GB_AheapB__times_second_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_SECOND_INT32 || GxB_NO_TIMES_SECOND_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_int32
 GrB_Info GB_Adot2B__times_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_int32
 GrB_Info GB_Adot3B__times_second_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_int32
+GrB_Info GB_Asaxpy3B__times_second_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_int64.c b/Source/Generated/GB_AxB__times_second_int64.c
index 98cc7900b6..f176a733c9 100644
--- a/Source/Generated/GB_AxB__times_second_int64.c
+++ b/Source/Generated/GB_AxB__times_second_int64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_int64
 // A'*B function (dot2):     GB_Adot2B__times_second_int64
 // A'*B function (dot3):     GB_Adot3B__times_second_int64
-// A*B function (heap):      GB_AheapB__times_second_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_SECOND_INT64 || GxB_NO_TIMES_SECOND_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_int64
 GrB_Info GB_Adot2B__times_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_int64
 GrB_Info GB_Adot3B__times_second_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_int64
+GrB_Info GB_Asaxpy3B__times_second_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_int8.c b/Source/Generated/GB_AxB__times_second_int8.c
index f0fcc23d3e..a9e7d7af64 100644
--- a/Source/Generated/GB_AxB__times_second_int8.c
+++ b/Source/Generated/GB_AxB__times_second_int8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_int8
 // A'*B function (dot2):     GB_Adot2B__times_second_int8
 // A'*B function (dot3):     GB_Adot3B__times_second_int8
-// A*B function (heap):      GB_AheapB__times_second_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_SECOND_INT8 || GxB_NO_TIMES_SECOND_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_int8
 GrB_Info GB_Adot2B__times_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_int8
 GrB_Info GB_Adot3B__times_second_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_int8
+GrB_Info GB_Asaxpy3B__times_second_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_uint16.c b/Source/Generated/GB_AxB__times_second_uint16.c
index 5a0ab586b1..88614bb3e7 100644
--- a/Source/Generated/GB_AxB__times_second_uint16.c
+++ b/Source/Generated/GB_AxB__times_second_uint16.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_uint16
 // A'*B function (dot2):     GB_Adot2B__times_second_uint16
 // A'*B function (dot3):     GB_Adot3B__times_second_uint16
-// A*B function (heap):      GB_AheapB__times_second_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_SECOND_UINT16 || GxB_NO_TIMES_SECOND_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_uint16
 GrB_Info GB_Adot2B__times_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_uint16
 GrB_Info GB_Adot3B__times_second_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_uint16
+GrB_Info GB_Asaxpy3B__times_second_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_uint32.c b/Source/Generated/GB_AxB__times_second_uint32.c
index 6145d00695..412f765c55 100644
--- a/Source/Generated/GB_AxB__times_second_uint32.c
+++ b/Source/Generated/GB_AxB__times_second_uint32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_uint32
 // A'*B function (dot2):     GB_Adot2B__times_second_uint32
 // A'*B function (dot3):     GB_Adot3B__times_second_uint32
-// A*B function (heap):      GB_AheapB__times_second_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_SECOND_UINT32 || GxB_NO_TIMES_SECOND_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_uint32
 GrB_Info GB_Adot2B__times_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_uint32
 GrB_Info GB_Adot3B__times_second_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_uint32
+GrB_Info GB_Asaxpy3B__times_second_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_uint64.c b/Source/Generated/GB_AxB__times_second_uint64.c
index b6e321c5dc..bc1e87de28 100644
--- a/Source/Generated/GB_AxB__times_second_uint64.c
+++ b/Source/Generated/GB_AxB__times_second_uint64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_uint64
 // A'*B function (dot2):     GB_Adot2B__times_second_uint64
 // A'*B function (dot3):     GB_Adot3B__times_second_uint64
-// A*B function (heap):      GB_AheapB__times_second_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_SECOND_UINT64 || GxB_NO_TIMES_SECOND_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_uint64
 GrB_Info GB_Adot2B__times_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_uint64
 GrB_Info GB_Adot3B__times_second_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_uint64
+GrB_Info GB_Asaxpy3B__times_second_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_second_uint8.c b/Source/Generated/GB_AxB__times_second_uint8.c
index e75633594a..2e13ed0e58 100644
--- a/Source/Generated/GB_AxB__times_second_uint8.c
+++ b/Source/Generated/GB_AxB__times_second_uint8.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_second_uint8
 // A'*B function (dot2):     GB_Adot2B__times_second_uint8
 // A'*B function (dot3):     GB_Adot3B__times_second_uint8
-// A*B function (heap):      GB_AheapB__times_second_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_second_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_second_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -33,6 +33,9 @@
 
 // Multiply: z = bkj
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= bkj
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = y ;
+#define GB_MULT(z, x, y) \
+    z = y
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= y ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= y
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_SECOND_UINT8 || GxB_NO_TIMES_SECOND_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_second_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_second_uint8
 GrB_Info GB_Adot2B__times_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_second_uint8
 GrB_Info GB_Adot3B__times_second_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_second_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_second_uint8
+GrB_Info GB_Asaxpy3B__times_second_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_fp32.c b/Source/Generated/GB_AxB__times_times_fp32.c
index 93418d9382..bd3fd9f4f5 100644
--- a/Source/Generated/GB_AxB__times_times_fp32.c
+++ b/Source/Generated/GB_AxB__times_times_fp32.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_fp32
 // A'*B function (dot2):     GB_Adot2B__times_times_fp32
 // A'*B function (dot3):     GB_Adot3B__times_times_fp32
-// A*B function (heap):      GB_AheapB__times_times_fp32
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_fp32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_fp32
 
 // C type:   float
 // A type:   float
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik * bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x * y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x * y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    float cij ;
+#define GB_CIJ_DECLARE(cij) \
+    float cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(float))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_TIMES_FP32 || GxB_NO_TIMES_TIMES_FP32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_fp32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    float *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    float *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_times_fp32
 GrB_Info GB_Adot2B__times_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_fp32
 GrB_Info GB_Adot3B__times_times_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_fp32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_times_fp32
+GrB_Info GB_Asaxpy3B__times_times_fp32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    float *GB_RESTRICT Cx = C->x ;
-    float cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_fp64.c b/Source/Generated/GB_AxB__times_times_fp64.c
index 590b935b4b..dc047eb244 100644
--- a/Source/Generated/GB_AxB__times_times_fp64.c
+++ b/Source/Generated/GB_AxB__times_times_fp64.c
@@ -1,9 +1,8 @@
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,18 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_fp64
 // A'*B function (dot2):     GB_Adot2B__times_times_fp64
 // A'*B function (dot3):     GB_Adot3B__times_times_fp64
-// A*B function (heap):      GB_AheapB__times_times_fp64
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_fp64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_fp64
 
 // C type:   double
 // A type:   double
@@ -33,6 +33,9 @@
 
 // Multiply: z = (aik * bkj)
 // Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  cij *= (aik * bkj)
 // Identity: 1
 // Terminal: ;
@@ -57,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    z *= (x * y) ;
-
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
+#define GB_MULTADD(z, x, y) \
+    z *= (x * y)
 
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -75,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     GB_PRAGMA_SIMD
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    double cij ;
+#define GB_CIJ_DECLARE(cij) \
+    double cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(double))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_TIMES_FP64 || GxB_NO_TIMES_TIMES_FP64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_fp64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    double *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    double *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -126,7 +177,7 @@ GrB_Info GB_AgusB__times_times_fp64
 GrB_Info GB_Adot2B__times_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -152,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_fp64
 GrB_Info GB_Adot3B__times_times_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -169,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_fp64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B__times_times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB__times_times_fp64
+GrB_Info GB_Asaxpy3B__times_times_fp64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    double *GB_RESTRICT Cx = C->x ;
-    double cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_int16.c b/Source/Generated/GB_AxB__times_times_int16.c
index bfd6f79fb6..462123a928 100644
--- a/Source/Generated/GB_AxB__times_times_int16.c
+++ b/Source/Generated/GB_AxB__times_times_int16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_int16
 // A'*B function (dot2):     GB_Adot2B__times_times_int16
 // A'*B function (dot3):     GB_Adot3B__times_times_int16
-// A*B function (heap):      GB_AheapB__times_times_int16
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_int16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_int16
 
 // C type:   int16_t
 // A type:   int16_t
 // B type:   int16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int16_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int16_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int16_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_TIMES_INT16 || GxB_NO_TIMES_TIMES_INT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_int16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_int16
 GrB_Info GB_Adot2B__times_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_int16
 GrB_Info GB_Adot3B__times_times_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_int16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_int16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_int16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int16_t *GB_RESTRICT Cx = C->x ;
-    int16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_int32.c b/Source/Generated/GB_AxB__times_times_int32.c
index 611169bc79..e8f6e45d1a 100644
--- a/Source/Generated/GB_AxB__times_times_int32.c
+++ b/Source/Generated/GB_AxB__times_times_int32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_int32
 // A'*B function (dot2):     GB_Adot2B__times_times_int32
 // A'*B function (dot3):     GB_Adot3B__times_times_int32
-// A*B function (heap):      GB_AheapB__times_times_int32
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_int32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_int32
 
 // C type:   int32_t
 // A type:   int32_t
 // B type:   int32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int32_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int32_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int32_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_TIMES_INT32 || GxB_NO_TIMES_TIMES_INT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_int32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_int32
 GrB_Info GB_Adot2B__times_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_int32
 GrB_Info GB_Adot3B__times_times_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_int32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_int32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_int32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int32_t *GB_RESTRICT Cx = C->x ;
-    int32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_int64.c b/Source/Generated/GB_AxB__times_times_int64.c
index 2b1ad37854..366cfd1c9b 100644
--- a/Source/Generated/GB_AxB__times_times_int64.c
+++ b/Source/Generated/GB_AxB__times_times_int64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_int64
 // A'*B function (dot2):     GB_Adot2B__times_times_int64
 // A'*B function (dot3):     GB_Adot3B__times_times_int64
-// A*B function (heap):      GB_AheapB__times_times_int64
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_int64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_int64
 
 // C type:   int64_t
 // A type:   int64_t
 // B type:   int64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int64_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int64_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int64_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_TIMES_INT64 || GxB_NO_TIMES_TIMES_INT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_int64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_int64
 GrB_Info GB_Adot2B__times_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_int64
 GrB_Info GB_Adot3B__times_times_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_int64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_int64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_int64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int64_t *GB_RESTRICT Cx = C->x ;
-    int64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_int8.c b/Source/Generated/GB_AxB__times_times_int8.c
index edc7205b89..6d6a9b56c7 100644
--- a/Source/Generated/GB_AxB__times_times_int8.c
+++ b/Source/Generated/GB_AxB__times_times_int8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_int8
 // A'*B function (dot2):     GB_Adot2B__times_times_int8
 // A'*B function (dot3):     GB_Adot3B__times_times_int8
-// A*B function (heap):      GB_AheapB__times_times_int8
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_int8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_int8
 
 // C type:   int8_t
 // A type:   int8_t
 // B type:   int8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  int8_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    int8_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    int8_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    int8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    int8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    int8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(int8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_TIMES_INT8 || GxB_NO_TIMES_TIMES_INT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_int8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    int8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_int8
 GrB_Info GB_Adot2B__times_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_int8
 GrB_Info GB_Adot3B__times_times_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_int8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_int8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_int8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    int8_t *GB_RESTRICT Cx = C->x ;
-    int8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_uint16.c b/Source/Generated/GB_AxB__times_times_uint16.c
index d2efe7a555..69826f830d 100644
--- a/Source/Generated/GB_AxB__times_times_uint16.c
+++ b/Source/Generated/GB_AxB__times_times_uint16.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_uint16
 // A'*B function (dot2):     GB_Adot2B__times_times_uint16
 // A'*B function (dot3):     GB_Adot3B__times_times_uint16
-// A*B function (heap):      GB_AheapB__times_times_uint16
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_uint16
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
 // B type:   uint16_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint16_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint16_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint16_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint16_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint16_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint16_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint16_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16 || GxB_NO_TIMES_TIMES_UINT16)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_uint16
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint16_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_uint16
 GrB_Info GB_Adot2B__times_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_uint16
 GrB_Info GB_Adot3B__times_times_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_uint16
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_uint16
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_uint16
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint16_t *GB_RESTRICT Cx = C->x ;
-    uint16_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_uint32.c b/Source/Generated/GB_AxB__times_times_uint32.c
index 911bee423d..a623bbf7ef 100644
--- a/Source/Generated/GB_AxB__times_times_uint32.c
+++ b/Source/Generated/GB_AxB__times_times_uint32.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_uint32
 // A'*B function (dot2):     GB_Adot2B__times_times_uint32
 // A'*B function (dot3):     GB_Adot3B__times_times_uint32
-// A*B function (heap):      GB_AheapB__times_times_uint32
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_uint32
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
 // B type:   uint32_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint32_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint32_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint32_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint32_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint32_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint32_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffffffffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint32_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32 || GxB_NO_TIMES_TIMES_UINT32)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_uint32
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint32_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_uint32
 GrB_Info GB_Adot2B__times_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_uint32
 GrB_Info GB_Adot3B__times_times_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_uint32
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_uint32
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_uint32
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint32_t *GB_RESTRICT Cx = C->x ;
-    uint32_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_uint64.c b/Source/Generated/GB_AxB__times_times_uint64.c
index 66a00a40bc..a93711fbe6 100644
--- a/Source/Generated/GB_AxB__times_times_uint64.c
+++ b/Source/Generated/GB_AxB__times_times_uint64.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_uint64
 // A'*B function (dot2):     GB_Adot2B__times_times_uint64
 // A'*B function (dot3):     GB_Adot3B__times_times_uint64
-// A*B function (heap):      GB_AheapB__times_times_uint64
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_uint64
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
 // B type:   uint64_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint64_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint64_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint64_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint64_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint64_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint64_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint64_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64 || GxB_NO_TIMES_TIMES_UINT64)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_uint64
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint64_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_uint64
 GrB_Info GB_Adot2B__times_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_uint64
 GrB_Info GB_Adot3B__times_times_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_uint64
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_uint64
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_uint64
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint64_t *GB_RESTRICT Cx = C->x ;
-    uint64_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_AxB__times_times_uint8.c b/Source/Generated/GB_AxB__times_times_uint8.c
index 830be44c43..40d833f7a3 100644
--- a/Source/Generated/GB_AxB__times_times_uint8.c
+++ b/Source/Generated/GB_AxB__times_times_uint8.c
@@ -1,10 +1,8 @@
-
-
 //------------------------------------------------------------------------------
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,25 +13,29 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB__times_times_uint8
 // A'*B function (dot2):     GB_Adot2B__times_times_uint8
 // A'*B function (dot3):     GB_Adot3B__times_times_uint8
-// A*B function (heap):      GB_AheapB__times_times_uint8
+// C+=A'*B function (dot4):  GB_Adot4B__times_times_uint8
+// A*B function (saxpy3):    GB_Asaxpy3B__times_times_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
 // B type:   uint8_t
 
 // Multiply: z = (aik * bkj)
-// Add:      cij *= x_op_y
+// Add:      cij *= z
+//           'any' monoid?  0
+//           atomic?        1
+//           OpenMP atomic? 1
 // MultAdd:  uint8_t x_op_y = (aik * bkj) ; cij *= x_op_y
 // Identity: 1
 // Terminal: if (cij == 0) break ;
@@ -58,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    z = (x * y) ;
+#define GB_MULT(z, x, y) \
+    z = (x * y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    uint8_t x_op_y = (x * y) ; z *= x_op_y ;
+#define GB_MULTADD(z, x, y) \
+    uint8_t x_op_y = (x * y) ; z *= x_op_y
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     1
 
@@ -76,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     if (cij == 0) break ;
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
     ;
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    uint8_t cij ;
+#define GB_CIJ_DECLARE(cij) \
+    uint8_t cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    Cx [p] *= t
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    x * y
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    uint8_t
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    0xffL
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    0
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    0
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    1
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    1
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    0
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    0
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        Hx [i] *= t
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(uint8_t))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8 || GxB_NO_TIMES_TIMES_UINT8)
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB__times_times_uint8
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    uint8_t *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -127,7 +177,7 @@ GrB_Info GB_AgusB__times_times_uint8
 GrB_Info GB_Adot2B__times_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -153,7 +203,7 @@ GrB_Info GB_Adot2B__times_times_uint8
 GrB_Info GB_Adot3B__times_times_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -170,33 +220,51 @@ GrB_Info GB_Adot3B__times_times_uint8
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+GrB_Info GB_Adot4B__times_times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
 
-GrB_Info GB_AheapB__times_times_uint8
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
+//------------------------------------------------------------------------------
+
+#include "GB_AxB_saxpy3_template.h"
+
+GrB_Info GB_Asaxpy3B__times_times_uint8
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    uint8_t *GB_RESTRICT Cx = C->x ;
-    uint8_t cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generated/GB_binop__div_fp32.c b/Source/Generated/GB_binop__div_fp32.c
index 908b1a3ea7..45fca430bd 100644
--- a/Source/Generated/GB_binop__div_fp32.c
+++ b/Source/Generated/GB_binop__div_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_fp32
-// A.*B function (eWiseMult):  GB_AemultB__div_fp32
-// A*D function (colscale):    GB_AxD__div_fp32
-// D*A function (rowscale):    GB_DxB__div_fp32
+// A+B function (eWiseAdd):         GB_AaddB__div_fp32
+// A.*B function (eWiseMult):       GB_AemultB__div_fp32
+// A*D function (colscale):         GB_AxD__div_fp32
+// D*A function (rowscale):         GB_DxB__div_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__div_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__div_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x / y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_FP32 || GxB_NO_DIV_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_fp64.c b/Source/Generated/GB_binop__div_fp64.c
index 50b0befa02..b4f0b8d92d 100644
--- a/Source/Generated/GB_binop__div_fp64.c
+++ b/Source/Generated/GB_binop__div_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_fp64
-// A.*B function (eWiseMult):  GB_AemultB__div_fp64
-// A*D function (colscale):    GB_AxD__div_fp64
-// D*A function (rowscale):    GB_DxB__div_fp64
+// A+B function (eWiseAdd):         GB_AaddB__div_fp64
+// A.*B function (eWiseMult):       GB_AemultB__div_fp64
+// A*D function (colscale):         GB_AxD__div_fp64
+// D*A function (rowscale):         GB_DxB__div_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__div_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__div_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x / y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_FP64 || GxB_NO_DIV_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_int16.c b/Source/Generated/GB_binop__div_int16.c
index b71108de73..b0e66418ee 100644
--- a/Source/Generated/GB_binop__div_int16.c
+++ b/Source/Generated/GB_binop__div_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_int16
-// A.*B function (eWiseMult):  GB_AemultB__div_int16
-// A*D function (colscale):    GB_AxD__div_int16
-// D*A function (rowscale):    GB_DxB__div_int16
+// A+B function (eWiseAdd):         GB_AaddB__div_int16
+// A.*B function (eWiseMult):       GB_AemultB__div_int16
+// A*D function (colscale):         GB_AxD__div_int16
+// D*A function (rowscale):         GB_DxB__div_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__div_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__div_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (x, y, 16) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_INT16 || GxB_NO_DIV_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_int32.c b/Source/Generated/GB_binop__div_int32.c
index 6ec699c320..a8443b0a4a 100644
--- a/Source/Generated/GB_binop__div_int32.c
+++ b/Source/Generated/GB_binop__div_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_int32
-// A.*B function (eWiseMult):  GB_AemultB__div_int32
-// A*D function (colscale):    GB_AxD__div_int32
-// D*A function (rowscale):    GB_DxB__div_int32
+// A+B function (eWiseAdd):         GB_AaddB__div_int32
+// A.*B function (eWiseMult):       GB_AemultB__div_int32
+// A*D function (colscale):         GB_AxD__div_int32
+// D*A function (rowscale):         GB_DxB__div_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__div_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__div_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (x, y, 32) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_INT32 || GxB_NO_DIV_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_int64.c b/Source/Generated/GB_binop__div_int64.c
index 13fdcd3060..19bb7168f7 100644
--- a/Source/Generated/GB_binop__div_int64.c
+++ b/Source/Generated/GB_binop__div_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_int64
-// A.*B function (eWiseMult):  GB_AemultB__div_int64
-// A*D function (colscale):    GB_AxD__div_int64
-// D*A function (rowscale):    GB_DxB__div_int64
+// A+B function (eWiseAdd):         GB_AaddB__div_int64
+// A.*B function (eWiseMult):       GB_AemultB__div_int64
+// A*D function (colscale):         GB_AxD__div_int64
+// D*A function (rowscale):         GB_DxB__div_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__div_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__div_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (x, y, 64) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_INT64 || GxB_NO_DIV_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_int8.c b/Source/Generated/GB_binop__div_int8.c
index 7e89feaec6..2beda26a86 100644
--- a/Source/Generated/GB_binop__div_int8.c
+++ b/Source/Generated/GB_binop__div_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_int8
-// A.*B function (eWiseMult):  GB_AemultB__div_int8
-// A*D function (colscale):    GB_AxD__div_int8
-// D*A function (rowscale):    GB_DxB__div_int8
+// A+B function (eWiseAdd):         GB_AaddB__div_int8
+// A.*B function (eWiseMult):       GB_AemultB__div_int8
+// A*D function (colscale):         GB_AxD__div_int8
+// D*A function (rowscale):         GB_DxB__div_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__div_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__div_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (x, y, 8) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_INT8 || GxB_NO_DIV_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_uint16.c b/Source/Generated/GB_binop__div_uint16.c
index eb7d94b936..9193d99bad 100644
--- a/Source/Generated/GB_binop__div_uint16.c
+++ b/Source/Generated/GB_binop__div_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_uint16
-// A.*B function (eWiseMult):  GB_AemultB__div_uint16
-// A*D function (colscale):    GB_AxD__div_uint16
-// D*A function (rowscale):    GB_DxB__div_uint16
+// A+B function (eWiseAdd):         GB_AaddB__div_uint16
+// A.*B function (eWiseMult):       GB_AemultB__div_uint16
+// A*D function (colscale):         GB_AxD__div_uint16
+// D*A function (rowscale):         GB_DxB__div_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__div_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__div_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (x, y, 16) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_UINT16 || GxB_NO_DIV_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_uint32.c b/Source/Generated/GB_binop__div_uint32.c
index 82a35bad35..754fdace10 100644
--- a/Source/Generated/GB_binop__div_uint32.c
+++ b/Source/Generated/GB_binop__div_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_uint32
-// A.*B function (eWiseMult):  GB_AemultB__div_uint32
-// A*D function (colscale):    GB_AxD__div_uint32
-// D*A function (rowscale):    GB_DxB__div_uint32
+// A+B function (eWiseAdd):         GB_AaddB__div_uint32
+// A.*B function (eWiseMult):       GB_AemultB__div_uint32
+// A*D function (colscale):         GB_AxD__div_uint32
+// D*A function (rowscale):         GB_DxB__div_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__div_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__div_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (x, y, 32) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_UINT32 || GxB_NO_DIV_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_uint64.c b/Source/Generated/GB_binop__div_uint64.c
index e2bfeca915..5da73b4a35 100644
--- a/Source/Generated/GB_binop__div_uint64.c
+++ b/Source/Generated/GB_binop__div_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_uint64
-// A.*B function (eWiseMult):  GB_AemultB__div_uint64
-// A*D function (colscale):    GB_AxD__div_uint64
-// D*A function (rowscale):    GB_DxB__div_uint64
+// A+B function (eWiseAdd):         GB_AaddB__div_uint64
+// A.*B function (eWiseMult):       GB_AemultB__div_uint64
+// A*D function (colscale):         GB_AxD__div_uint64
+// D*A function (rowscale):         GB_DxB__div_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__div_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__div_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (x, y, 64) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_UINT64 || GxB_NO_DIV_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__div_uint8.c b/Source/Generated/GB_binop__div_uint8.c
index 4a9db928db..bcfabbcfbb 100644
--- a/Source/Generated/GB_binop__div_uint8.c
+++ b/Source/Generated/GB_binop__div_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__div_uint8
-// A.*B function (eWiseMult):  GB_AemultB__div_uint8
-// A*D function (colscale):    GB_AxD__div_uint8
-// D*A function (rowscale):    GB_DxB__div_uint8
+// A+B function (eWiseAdd):         GB_AaddB__div_uint8
+// A.*B function (eWiseMult):       GB_AemultB__div_uint8
+// A*D function (colscale):         GB_AxD__div_uint8
+// D*A function (rowscale):         GB_DxB__div_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__div_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__div_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__div_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__div_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (x, y, 8) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_DIV || GxB_NO_UINT8 || GxB_NO_DIV_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__div_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__div_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__div_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_bool.c b/Source/Generated/GB_binop__eq_bool.c
index 6757475ca5..067e95831a 100644
--- a/Source/Generated/GB_binop__eq_bool.c
+++ b/Source/Generated/GB_binop__eq_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_bool
-// A.*B function (eWiseMult):  GB_AemultB__eq_bool
-// A*D function (colscale):    GB_AxD__eq_bool
-// D*A function (rowscale):    GB_DxB__eq_bool
+// A+B function (eWiseAdd):         GB_AaddB__eq_bool
+// A.*B function (eWiseMult):       GB_AemultB__eq_bool
+// A*D function (colscale):         GB_AxD__eq_bool
+// D*A function (rowscale):         GB_DxB__eq_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_EQ_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_fp32.c b/Source/Generated/GB_binop__eq_fp32.c
index b32efc55f2..52be1af5b3 100644
--- a/Source/Generated/GB_binop__eq_fp32.c
+++ b/Source/Generated/GB_binop__eq_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_fp32
-// A.*B function (eWiseMult):  GB_AemultB__eq_fp32
-// A*D function (colscale):    GB_AxD__eq_fp32
-// D*A function (rowscale):    GB_DxB__eq_fp32
+// A+B function (eWiseAdd):         GB_AaddB__eq_fp32
+// A.*B function (eWiseMult):       GB_AemultB__eq_fp32
+// A*D function (colscale):         GB_AxD__eq_fp32
+// D*A function (rowscale):         GB_DxB__eq_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_FP32 || GxB_NO_EQ_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_fp64.c b/Source/Generated/GB_binop__eq_fp64.c
index e3d6c66e9b..11718e0726 100644
--- a/Source/Generated/GB_binop__eq_fp64.c
+++ b/Source/Generated/GB_binop__eq_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_fp64
-// A.*B function (eWiseMult):  GB_AemultB__eq_fp64
-// A*D function (colscale):    GB_AxD__eq_fp64
-// D*A function (rowscale):    GB_DxB__eq_fp64
+// A+B function (eWiseAdd):         GB_AaddB__eq_fp64
+// A.*B function (eWiseMult):       GB_AemultB__eq_fp64
+// A*D function (colscale):         GB_AxD__eq_fp64
+// D*A function (rowscale):         GB_DxB__eq_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_FP64 || GxB_NO_EQ_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_int16.c b/Source/Generated/GB_binop__eq_int16.c
index a4d4cfc122..c28e760c8e 100644
--- a/Source/Generated/GB_binop__eq_int16.c
+++ b/Source/Generated/GB_binop__eq_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_int16
-// A.*B function (eWiseMult):  GB_AemultB__eq_int16
-// A*D function (colscale):    GB_AxD__eq_int16
-// D*A function (rowscale):    GB_DxB__eq_int16
+// A+B function (eWiseAdd):         GB_AaddB__eq_int16
+// A.*B function (eWiseMult):       GB_AemultB__eq_int16
+// A*D function (colscale):         GB_AxD__eq_int16
+// D*A function (rowscale):         GB_DxB__eq_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT16 || GxB_NO_EQ_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_int32.c b/Source/Generated/GB_binop__eq_int32.c
index b1b091a661..9df70dff30 100644
--- a/Source/Generated/GB_binop__eq_int32.c
+++ b/Source/Generated/GB_binop__eq_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_int32
-// A.*B function (eWiseMult):  GB_AemultB__eq_int32
-// A*D function (colscale):    GB_AxD__eq_int32
-// D*A function (rowscale):    GB_DxB__eq_int32
+// A+B function (eWiseAdd):         GB_AaddB__eq_int32
+// A.*B function (eWiseMult):       GB_AemultB__eq_int32
+// A*D function (colscale):         GB_AxD__eq_int32
+// D*A function (rowscale):         GB_DxB__eq_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT32 || GxB_NO_EQ_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_int64.c b/Source/Generated/GB_binop__eq_int64.c
index 2652955590..cfdba7d596 100644
--- a/Source/Generated/GB_binop__eq_int64.c
+++ b/Source/Generated/GB_binop__eq_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_int64
-// A.*B function (eWiseMult):  GB_AemultB__eq_int64
-// A*D function (colscale):    GB_AxD__eq_int64
-// D*A function (rowscale):    GB_DxB__eq_int64
+// A+B function (eWiseAdd):         GB_AaddB__eq_int64
+// A.*B function (eWiseMult):       GB_AemultB__eq_int64
+// A*D function (colscale):         GB_AxD__eq_int64
+// D*A function (rowscale):         GB_DxB__eq_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT64 || GxB_NO_EQ_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_int8.c b/Source/Generated/GB_binop__eq_int8.c
index fb5cea1077..b7da7a02a3 100644
--- a/Source/Generated/GB_binop__eq_int8.c
+++ b/Source/Generated/GB_binop__eq_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_int8
-// A.*B function (eWiseMult):  GB_AemultB__eq_int8
-// A*D function (colscale):    GB_AxD__eq_int8
-// D*A function (rowscale):    GB_DxB__eq_int8
+// A+B function (eWiseAdd):         GB_AaddB__eq_int8
+// A.*B function (eWiseMult):       GB_AemultB__eq_int8
+// A*D function (colscale):         GB_AxD__eq_int8
+// D*A function (rowscale):         GB_DxB__eq_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_INT8 || GxB_NO_EQ_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_uint16.c b/Source/Generated/GB_binop__eq_uint16.c
index 2413cb048c..91bbdb584f 100644
--- a/Source/Generated/GB_binop__eq_uint16.c
+++ b/Source/Generated/GB_binop__eq_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_uint16
-// A.*B function (eWiseMult):  GB_AemultB__eq_uint16
-// A*D function (colscale):    GB_AxD__eq_uint16
-// D*A function (rowscale):    GB_DxB__eq_uint16
+// A+B function (eWiseAdd):         GB_AaddB__eq_uint16
+// A.*B function (eWiseMult):       GB_AemultB__eq_uint16
+// A*D function (colscale):         GB_AxD__eq_uint16
+// D*A function (rowscale):         GB_DxB__eq_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT16 || GxB_NO_EQ_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_uint32.c b/Source/Generated/GB_binop__eq_uint32.c
index 508a60cd1d..83dda8307d 100644
--- a/Source/Generated/GB_binop__eq_uint32.c
+++ b/Source/Generated/GB_binop__eq_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_uint32
-// A.*B function (eWiseMult):  GB_AemultB__eq_uint32
-// A*D function (colscale):    GB_AxD__eq_uint32
-// D*A function (rowscale):    GB_DxB__eq_uint32
+// A+B function (eWiseAdd):         GB_AaddB__eq_uint32
+// A.*B function (eWiseMult):       GB_AemultB__eq_uint32
+// A*D function (colscale):         GB_AxD__eq_uint32
+// D*A function (rowscale):         GB_DxB__eq_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT32 || GxB_NO_EQ_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_uint64.c b/Source/Generated/GB_binop__eq_uint64.c
index 1b6a64ae47..a2e5f77d0f 100644
--- a/Source/Generated/GB_binop__eq_uint64.c
+++ b/Source/Generated/GB_binop__eq_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_uint64
-// A.*B function (eWiseMult):  GB_AemultB__eq_uint64
-// A*D function (colscale):    GB_AxD__eq_uint64
-// D*A function (rowscale):    GB_DxB__eq_uint64
+// A+B function (eWiseAdd):         GB_AaddB__eq_uint64
+// A.*B function (eWiseMult):       GB_AemultB__eq_uint64
+// A*D function (colscale):         GB_AxD__eq_uint64
+// D*A function (rowscale):         GB_DxB__eq_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT64 || GxB_NO_EQ_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__eq_uint8.c b/Source/Generated/GB_binop__eq_uint8.c
index 3e748b9548..25b2c598d0 100644
--- a/Source/Generated/GB_binop__eq_uint8.c
+++ b/Source/Generated/GB_binop__eq_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__eq_uint8
-// A.*B function (eWiseMult):  GB_AemultB__eq_uint8
-// A*D function (colscale):    GB_AxD__eq_uint8
-// D*A function (rowscale):    GB_DxB__eq_uint8
+// A+B function (eWiseAdd):         GB_AaddB__eq_uint8
+// A.*B function (eWiseMult):       GB_AemultB__eq_uint8
+// A*D function (colscale):         GB_AxD__eq_uint8
+// D*A function (rowscale):         GB_DxB__eq_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__eq_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__eq_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__eq_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_UINT8 || GxB_NO_EQ_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__eq_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__eq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__eq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_bool.c b/Source/Generated/GB_binop__first_bool.c
index 567ab3094c..08766a42b4 100644
--- a/Source/Generated/GB_binop__first_bool.c
+++ b/Source/Generated/GB_binop__first_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_bool
-// A.*B function (eWiseMult):  GB_AemultB__first_bool
-// A*D function (colscale):    GB_AxD__first_bool
-// D*A function (rowscale):    GB_DxB__first_bool
+// A+B function (eWiseAdd):         GB_AaddB__first_bool
+// A.*B function (eWiseMult):       GB_AemultB__first_bool
+// A*D function (colscale):         GB_AxD__first_bool
+// D*A function (rowscale):         GB_DxB__first_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__first_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__first_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_FIRST_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_fp32.c b/Source/Generated/GB_binop__first_fp32.c
index 5a0a0c638e..72912d35ac 100644
--- a/Source/Generated/GB_binop__first_fp32.c
+++ b/Source/Generated/GB_binop__first_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_fp32
-// A.*B function (eWiseMult):  GB_AemultB__first_fp32
-// A*D function (colscale):    GB_AxD__first_fp32
-// D*A function (rowscale):    GB_DxB__first_fp32
+// A+B function (eWiseAdd):         GB_AaddB__first_fp32
+// A.*B function (eWiseMult):       GB_AemultB__first_fp32
+// A*D function (colscale):         GB_AxD__first_fp32
+// D*A function (rowscale):         GB_DxB__first_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__first_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__first_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_FIRST_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_fp64.c b/Source/Generated/GB_binop__first_fp64.c
index 47af94c9b3..30d92a796e 100644
--- a/Source/Generated/GB_binop__first_fp64.c
+++ b/Source/Generated/GB_binop__first_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_fp64
-// A.*B function (eWiseMult):  GB_AemultB__first_fp64
-// A*D function (colscale):    GB_AxD__first_fp64
-// D*A function (rowscale):    GB_DxB__first_fp64
+// A+B function (eWiseAdd):         GB_AaddB__first_fp64
+// A.*B function (eWiseMult):       GB_AemultB__first_fp64
+// A*D function (colscale):         GB_AxD__first_fp64
+// D*A function (rowscale):         GB_DxB__first_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__first_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__first_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_FIRST_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_int16.c b/Source/Generated/GB_binop__first_int16.c
index de07ed5c04..9c03d4d8c9 100644
--- a/Source/Generated/GB_binop__first_int16.c
+++ b/Source/Generated/GB_binop__first_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_int16
-// A.*B function (eWiseMult):  GB_AemultB__first_int16
-// A*D function (colscale):    GB_AxD__first_int16
-// D*A function (rowscale):    GB_DxB__first_int16
+// A+B function (eWiseAdd):         GB_AaddB__first_int16
+// A.*B function (eWiseMult):       GB_AemultB__first_int16
+// A*D function (colscale):         GB_AxD__first_int16
+// D*A function (rowscale):         GB_DxB__first_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__first_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__first_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_FIRST_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_int32.c b/Source/Generated/GB_binop__first_int32.c
index 72d9358da1..5c27eb5a0a 100644
--- a/Source/Generated/GB_binop__first_int32.c
+++ b/Source/Generated/GB_binop__first_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_int32
-// A.*B function (eWiseMult):  GB_AemultB__first_int32
-// A*D function (colscale):    GB_AxD__first_int32
-// D*A function (rowscale):    GB_DxB__first_int32
+// A+B function (eWiseAdd):         GB_AaddB__first_int32
+// A.*B function (eWiseMult):       GB_AemultB__first_int32
+// A*D function (colscale):         GB_AxD__first_int32
+// D*A function (rowscale):         GB_DxB__first_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__first_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__first_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_FIRST_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_int64.c b/Source/Generated/GB_binop__first_int64.c
index c58d9f5f37..d8a90579a6 100644
--- a/Source/Generated/GB_binop__first_int64.c
+++ b/Source/Generated/GB_binop__first_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_int64
-// A.*B function (eWiseMult):  GB_AemultB__first_int64
-// A*D function (colscale):    GB_AxD__first_int64
-// D*A function (rowscale):    GB_DxB__first_int64
+// A+B function (eWiseAdd):         GB_AaddB__first_int64
+// A.*B function (eWiseMult):       GB_AemultB__first_int64
+// A*D function (colscale):         GB_AxD__first_int64
+// D*A function (rowscale):         GB_DxB__first_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__first_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__first_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_FIRST_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_int8.c b/Source/Generated/GB_binop__first_int8.c
index 274be154bf..5e9a0c12ff 100644
--- a/Source/Generated/GB_binop__first_int8.c
+++ b/Source/Generated/GB_binop__first_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_int8
-// A.*B function (eWiseMult):  GB_AemultB__first_int8
-// A*D function (colscale):    GB_AxD__first_int8
-// D*A function (rowscale):    GB_DxB__first_int8
+// A+B function (eWiseAdd):         GB_AaddB__first_int8
+// A.*B function (eWiseMult):       GB_AemultB__first_int8
+// A*D function (colscale):         GB_AxD__first_int8
+// D*A function (rowscale):         GB_DxB__first_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__first_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__first_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_FIRST_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_uint16.c b/Source/Generated/GB_binop__first_uint16.c
index 2573f64316..9659fbfa0a 100644
--- a/Source/Generated/GB_binop__first_uint16.c
+++ b/Source/Generated/GB_binop__first_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_uint16
-// A.*B function (eWiseMult):  GB_AemultB__first_uint16
-// A*D function (colscale):    GB_AxD__first_uint16
-// D*A function (rowscale):    GB_DxB__first_uint16
+// A+B function (eWiseAdd):         GB_AaddB__first_uint16
+// A.*B function (eWiseMult):       GB_AemultB__first_uint16
+// A*D function (colscale):         GB_AxD__first_uint16
+// D*A function (rowscale):         GB_DxB__first_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__first_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__first_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_FIRST_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_uint32.c b/Source/Generated/GB_binop__first_uint32.c
index 11fc3942e5..f8dab360cc 100644
--- a/Source/Generated/GB_binop__first_uint32.c
+++ b/Source/Generated/GB_binop__first_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_uint32
-// A.*B function (eWiseMult):  GB_AemultB__first_uint32
-// A*D function (colscale):    GB_AxD__first_uint32
-// D*A function (rowscale):    GB_DxB__first_uint32
+// A+B function (eWiseAdd):         GB_AaddB__first_uint32
+// A.*B function (eWiseMult):       GB_AemultB__first_uint32
+// A*D function (colscale):         GB_AxD__first_uint32
+// D*A function (rowscale):         GB_DxB__first_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__first_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__first_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_FIRST_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_uint64.c b/Source/Generated/GB_binop__first_uint64.c
index 4f555b79cb..a634d713f7 100644
--- a/Source/Generated/GB_binop__first_uint64.c
+++ b/Source/Generated/GB_binop__first_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_uint64
-// A.*B function (eWiseMult):  GB_AemultB__first_uint64
-// A*D function (colscale):    GB_AxD__first_uint64
-// D*A function (rowscale):    GB_DxB__first_uint64
+// A+B function (eWiseAdd):         GB_AaddB__first_uint64
+// A.*B function (eWiseMult):       GB_AemultB__first_uint64
+// A*D function (colscale):         GB_AxD__first_uint64
+// D*A function (rowscale):         GB_DxB__first_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__first_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__first_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_FIRST_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__first_uint8.c b/Source/Generated/GB_binop__first_uint8.c
index 59d66105af..4f64b8e43e 100644
--- a/Source/Generated/GB_binop__first_uint8.c
+++ b/Source/Generated/GB_binop__first_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__first_uint8
-// A.*B function (eWiseMult):  GB_AemultB__first_uint8
-// A*D function (colscale):    GB_AxD__first_uint8
-// D*A function (rowscale):    GB_DxB__first_uint8
+// A+B function (eWiseAdd):         GB_AaddB__first_uint8
+// A.*B function (eWiseMult):       GB_AemultB__first_uint8
+// A*D function (colscale):         GB_AxD__first_uint8
+// D*A function (rowscale):         GB_DxB__first_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__first_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__first_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__first_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     ;
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = x ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_FIRST_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__first_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__first_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__first_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_bool.c b/Source/Generated/GB_binop__ge_bool.c
index 444b200539..1de69fb71d 100644
--- a/Source/Generated/GB_binop__ge_bool.c
+++ b/Source/Generated/GB_binop__ge_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_bool
-// A.*B function (eWiseMult):  GB_AemultB__ge_bool
-// A*D function (colscale):    GB_AxD__ge_bool
-// D*A function (rowscale):    GB_DxB__ge_bool
+// A+B function (eWiseAdd):         GB_AaddB__ge_bool
+// A.*B function (eWiseMult):       GB_AemultB__ge_bool
+// A*D function (colscale):         GB_AxD__ge_bool
+// D*A function (rowscale):         GB_DxB__ge_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_BOOL || GxB_NO_GE_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_fp32.c b/Source/Generated/GB_binop__ge_fp32.c
index dd94f3c19d..668fbf87e3 100644
--- a/Source/Generated/GB_binop__ge_fp32.c
+++ b/Source/Generated/GB_binop__ge_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_fp32
-// A.*B function (eWiseMult):  GB_AemultB__ge_fp32
-// A*D function (colscale):    GB_AxD__ge_fp32
-// D*A function (rowscale):    GB_DxB__ge_fp32
+// A+B function (eWiseAdd):         GB_AaddB__ge_fp32
+// A.*B function (eWiseMult):       GB_AemultB__ge_fp32
+// A*D function (colscale):         GB_AxD__ge_fp32
+// D*A function (rowscale):         GB_DxB__ge_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_FP32 || GxB_NO_GE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_fp64.c b/Source/Generated/GB_binop__ge_fp64.c
index 63a5670382..957df2a5c6 100644
--- a/Source/Generated/GB_binop__ge_fp64.c
+++ b/Source/Generated/GB_binop__ge_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_fp64
-// A.*B function (eWiseMult):  GB_AemultB__ge_fp64
-// A*D function (colscale):    GB_AxD__ge_fp64
-// D*A function (rowscale):    GB_DxB__ge_fp64
+// A+B function (eWiseAdd):         GB_AaddB__ge_fp64
+// A.*B function (eWiseMult):       GB_AemultB__ge_fp64
+// A*D function (colscale):         GB_AxD__ge_fp64
+// D*A function (rowscale):         GB_DxB__ge_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_FP64 || GxB_NO_GE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_int16.c b/Source/Generated/GB_binop__ge_int16.c
index efaaf64ae5..3f0a8c8a7b 100644
--- a/Source/Generated/GB_binop__ge_int16.c
+++ b/Source/Generated/GB_binop__ge_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_int16
-// A.*B function (eWiseMult):  GB_AemultB__ge_int16
-// A*D function (colscale):    GB_AxD__ge_int16
-// D*A function (rowscale):    GB_DxB__ge_int16
+// A+B function (eWiseAdd):         GB_AaddB__ge_int16
+// A.*B function (eWiseMult):       GB_AemultB__ge_int16
+// A*D function (colscale):         GB_AxD__ge_int16
+// D*A function (rowscale):         GB_DxB__ge_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_INT16 || GxB_NO_GE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_int32.c b/Source/Generated/GB_binop__ge_int32.c
index 436cd9a076..d7e28336b0 100644
--- a/Source/Generated/GB_binop__ge_int32.c
+++ b/Source/Generated/GB_binop__ge_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_int32
-// A.*B function (eWiseMult):  GB_AemultB__ge_int32
-// A*D function (colscale):    GB_AxD__ge_int32
-// D*A function (rowscale):    GB_DxB__ge_int32
+// A+B function (eWiseAdd):         GB_AaddB__ge_int32
+// A.*B function (eWiseMult):       GB_AemultB__ge_int32
+// A*D function (colscale):         GB_AxD__ge_int32
+// D*A function (rowscale):         GB_DxB__ge_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_INT32 || GxB_NO_GE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_int64.c b/Source/Generated/GB_binop__ge_int64.c
index 51e1bdd2ae..c546a75d5d 100644
--- a/Source/Generated/GB_binop__ge_int64.c
+++ b/Source/Generated/GB_binop__ge_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_int64
-// A.*B function (eWiseMult):  GB_AemultB__ge_int64
-// A*D function (colscale):    GB_AxD__ge_int64
-// D*A function (rowscale):    GB_DxB__ge_int64
+// A+B function (eWiseAdd):         GB_AaddB__ge_int64
+// A.*B function (eWiseMult):       GB_AemultB__ge_int64
+// A*D function (colscale):         GB_AxD__ge_int64
+// D*A function (rowscale):         GB_DxB__ge_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_INT64 || GxB_NO_GE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_int8.c b/Source/Generated/GB_binop__ge_int8.c
index f34a481e9e..3dd4a2eb1a 100644
--- a/Source/Generated/GB_binop__ge_int8.c
+++ b/Source/Generated/GB_binop__ge_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_int8
-// A.*B function (eWiseMult):  GB_AemultB__ge_int8
-// A*D function (colscale):    GB_AxD__ge_int8
-// D*A function (rowscale):    GB_DxB__ge_int8
+// A+B function (eWiseAdd):         GB_AaddB__ge_int8
+// A.*B function (eWiseMult):       GB_AemultB__ge_int8
+// A*D function (colscale):         GB_AxD__ge_int8
+// D*A function (rowscale):         GB_DxB__ge_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_INT8 || GxB_NO_GE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_uint16.c b/Source/Generated/GB_binop__ge_uint16.c
index 9e131b2999..349a1fca0b 100644
--- a/Source/Generated/GB_binop__ge_uint16.c
+++ b/Source/Generated/GB_binop__ge_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_uint16
-// A.*B function (eWiseMult):  GB_AemultB__ge_uint16
-// A*D function (colscale):    GB_AxD__ge_uint16
-// D*A function (rowscale):    GB_DxB__ge_uint16
+// A+B function (eWiseAdd):         GB_AaddB__ge_uint16
+// A.*B function (eWiseMult):       GB_AemultB__ge_uint16
+// A*D function (colscale):         GB_AxD__ge_uint16
+// D*A function (rowscale):         GB_DxB__ge_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_UINT16 || GxB_NO_GE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_uint32.c b/Source/Generated/GB_binop__ge_uint32.c
index 5bd2a041be..4cdf78f29b 100644
--- a/Source/Generated/GB_binop__ge_uint32.c
+++ b/Source/Generated/GB_binop__ge_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_uint32
-// A.*B function (eWiseMult):  GB_AemultB__ge_uint32
-// A*D function (colscale):    GB_AxD__ge_uint32
-// D*A function (rowscale):    GB_DxB__ge_uint32
+// A+B function (eWiseAdd):         GB_AaddB__ge_uint32
+// A.*B function (eWiseMult):       GB_AemultB__ge_uint32
+// A*D function (colscale):         GB_AxD__ge_uint32
+// D*A function (rowscale):         GB_DxB__ge_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_UINT32 || GxB_NO_GE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_uint64.c b/Source/Generated/GB_binop__ge_uint64.c
index 17e7209b34..27fab25ce6 100644
--- a/Source/Generated/GB_binop__ge_uint64.c
+++ b/Source/Generated/GB_binop__ge_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_uint64
-// A.*B function (eWiseMult):  GB_AemultB__ge_uint64
-// A*D function (colscale):    GB_AxD__ge_uint64
-// D*A function (rowscale):    GB_DxB__ge_uint64
+// A+B function (eWiseAdd):         GB_AaddB__ge_uint64
+// A.*B function (eWiseMult):       GB_AemultB__ge_uint64
+// A*D function (colscale):         GB_AxD__ge_uint64
+// D*A function (rowscale):         GB_DxB__ge_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_UINT64 || GxB_NO_GE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ge_uint8.c b/Source/Generated/GB_binop__ge_uint8.c
index 5538584e6e..5c37710c26 100644
--- a/Source/Generated/GB_binop__ge_uint8.c
+++ b/Source/Generated/GB_binop__ge_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ge_uint8
-// A.*B function (eWiseMult):  GB_AemultB__ge_uint8
-// A*D function (colscale):    GB_AxD__ge_uint8
-// D*A function (rowscale):    GB_DxB__ge_uint8
+// A+B function (eWiseAdd):         GB_AaddB__ge_uint8
+// A.*B function (eWiseMult):       GB_AemultB__ge_uint8
+// A*D function (colscale):         GB_AxD__ge_uint8
+// D*A function (rowscale):         GB_DxB__ge_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__ge_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__ge_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ge_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GE || GxB_NO_UINT8 || GxB_NO_GE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ge_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_bool.c b/Source/Generated/GB_binop__gt_bool.c
index cf97f593ff..3da6b3372b 100644
--- a/Source/Generated/GB_binop__gt_bool.c
+++ b/Source/Generated/GB_binop__gt_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_bool
-// A.*B function (eWiseMult):  GB_AemultB__gt_bool
-// A*D function (colscale):    GB_AxD__gt_bool
-// D*A function (rowscale):    GB_DxB__gt_bool
+// A+B function (eWiseAdd):         GB_AaddB__gt_bool
+// A.*B function (eWiseMult):       GB_AemultB__gt_bool
+// A*D function (colscale):         GB_AxD__gt_bool
+// D*A function (rowscale):         GB_DxB__gt_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_BOOL || GxB_NO_GT_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_fp32.c b/Source/Generated/GB_binop__gt_fp32.c
index 26c4c87560..db4216dc99 100644
--- a/Source/Generated/GB_binop__gt_fp32.c
+++ b/Source/Generated/GB_binop__gt_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_fp32
-// A.*B function (eWiseMult):  GB_AemultB__gt_fp32
-// A*D function (colscale):    GB_AxD__gt_fp32
-// D*A function (rowscale):    GB_DxB__gt_fp32
+// A+B function (eWiseAdd):         GB_AaddB__gt_fp32
+// A.*B function (eWiseMult):       GB_AemultB__gt_fp32
+// A*D function (colscale):         GB_AxD__gt_fp32
+// D*A function (rowscale):         GB_DxB__gt_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_FP32 || GxB_NO_GT_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_fp64.c b/Source/Generated/GB_binop__gt_fp64.c
index a209d7e44f..d9639698b8 100644
--- a/Source/Generated/GB_binop__gt_fp64.c
+++ b/Source/Generated/GB_binop__gt_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_fp64
-// A.*B function (eWiseMult):  GB_AemultB__gt_fp64
-// A*D function (colscale):    GB_AxD__gt_fp64
-// D*A function (rowscale):    GB_DxB__gt_fp64
+// A+B function (eWiseAdd):         GB_AaddB__gt_fp64
+// A.*B function (eWiseMult):       GB_AemultB__gt_fp64
+// A*D function (colscale):         GB_AxD__gt_fp64
+// D*A function (rowscale):         GB_DxB__gt_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_FP64 || GxB_NO_GT_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_int16.c b/Source/Generated/GB_binop__gt_int16.c
index 46ef181d0d..174bf7c005 100644
--- a/Source/Generated/GB_binop__gt_int16.c
+++ b/Source/Generated/GB_binop__gt_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_int16
-// A.*B function (eWiseMult):  GB_AemultB__gt_int16
-// A*D function (colscale):    GB_AxD__gt_int16
-// D*A function (rowscale):    GB_DxB__gt_int16
+// A+B function (eWiseAdd):         GB_AaddB__gt_int16
+// A.*B function (eWiseMult):       GB_AemultB__gt_int16
+// A*D function (colscale):         GB_AxD__gt_int16
+// D*A function (rowscale):         GB_DxB__gt_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_INT16 || GxB_NO_GT_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_int32.c b/Source/Generated/GB_binop__gt_int32.c
index 736f83ddc4..6e681757a9 100644
--- a/Source/Generated/GB_binop__gt_int32.c
+++ b/Source/Generated/GB_binop__gt_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_int32
-// A.*B function (eWiseMult):  GB_AemultB__gt_int32
-// A*D function (colscale):    GB_AxD__gt_int32
-// D*A function (rowscale):    GB_DxB__gt_int32
+// A+B function (eWiseAdd):         GB_AaddB__gt_int32
+// A.*B function (eWiseMult):       GB_AemultB__gt_int32
+// A*D function (colscale):         GB_AxD__gt_int32
+// D*A function (rowscale):         GB_DxB__gt_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_INT32 || GxB_NO_GT_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_int64.c b/Source/Generated/GB_binop__gt_int64.c
index 265230bbbc..6665c917fc 100644
--- a/Source/Generated/GB_binop__gt_int64.c
+++ b/Source/Generated/GB_binop__gt_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_int64
-// A.*B function (eWiseMult):  GB_AemultB__gt_int64
-// A*D function (colscale):    GB_AxD__gt_int64
-// D*A function (rowscale):    GB_DxB__gt_int64
+// A+B function (eWiseAdd):         GB_AaddB__gt_int64
+// A.*B function (eWiseMult):       GB_AemultB__gt_int64
+// A*D function (colscale):         GB_AxD__gt_int64
+// D*A function (rowscale):         GB_DxB__gt_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_INT64 || GxB_NO_GT_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_int8.c b/Source/Generated/GB_binop__gt_int8.c
index f8c3919c2a..a2610f620b 100644
--- a/Source/Generated/GB_binop__gt_int8.c
+++ b/Source/Generated/GB_binop__gt_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_int8
-// A.*B function (eWiseMult):  GB_AemultB__gt_int8
-// A*D function (colscale):    GB_AxD__gt_int8
-// D*A function (rowscale):    GB_DxB__gt_int8
+// A+B function (eWiseAdd):         GB_AaddB__gt_int8
+// A.*B function (eWiseMult):       GB_AemultB__gt_int8
+// A*D function (colscale):         GB_AxD__gt_int8
+// D*A function (rowscale):         GB_DxB__gt_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_INT8 || GxB_NO_GT_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_uint16.c b/Source/Generated/GB_binop__gt_uint16.c
index 86dd39f869..24b337e70c 100644
--- a/Source/Generated/GB_binop__gt_uint16.c
+++ b/Source/Generated/GB_binop__gt_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_uint16
-// A.*B function (eWiseMult):  GB_AemultB__gt_uint16
-// A*D function (colscale):    GB_AxD__gt_uint16
-// D*A function (rowscale):    GB_DxB__gt_uint16
+// A+B function (eWiseAdd):         GB_AaddB__gt_uint16
+// A.*B function (eWiseMult):       GB_AemultB__gt_uint16
+// A*D function (colscale):         GB_AxD__gt_uint16
+// D*A function (rowscale):         GB_DxB__gt_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_UINT16 || GxB_NO_GT_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_uint32.c b/Source/Generated/GB_binop__gt_uint32.c
index e6837cdb07..3de879c3bf 100644
--- a/Source/Generated/GB_binop__gt_uint32.c
+++ b/Source/Generated/GB_binop__gt_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_uint32
-// A.*B function (eWiseMult):  GB_AemultB__gt_uint32
-// A*D function (colscale):    GB_AxD__gt_uint32
-// D*A function (rowscale):    GB_DxB__gt_uint32
+// A+B function (eWiseAdd):         GB_AaddB__gt_uint32
+// A.*B function (eWiseMult):       GB_AemultB__gt_uint32
+// A*D function (colscale):         GB_AxD__gt_uint32
+// D*A function (rowscale):         GB_DxB__gt_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_UINT32 || GxB_NO_GT_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_uint64.c b/Source/Generated/GB_binop__gt_uint64.c
index 6465e1f3c6..04f786613f 100644
--- a/Source/Generated/GB_binop__gt_uint64.c
+++ b/Source/Generated/GB_binop__gt_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_uint64
-// A.*B function (eWiseMult):  GB_AemultB__gt_uint64
-// A*D function (colscale):    GB_AxD__gt_uint64
-// D*A function (rowscale):    GB_DxB__gt_uint64
+// A+B function (eWiseAdd):         GB_AaddB__gt_uint64
+// A.*B function (eWiseMult):       GB_AemultB__gt_uint64
+// A*D function (colscale):         GB_AxD__gt_uint64
+// D*A function (rowscale):         GB_DxB__gt_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_UINT64 || GxB_NO_GT_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__gt_uint8.c b/Source/Generated/GB_binop__gt_uint8.c
index 5dfc8b2757..98e3f39b5f 100644
--- a/Source/Generated/GB_binop__gt_uint8.c
+++ b/Source/Generated/GB_binop__gt_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__gt_uint8
-// A.*B function (eWiseMult):  GB_AemultB__gt_uint8
-// A*D function (colscale):    GB_AxD__gt_uint8
-// D*A function (rowscale):    GB_DxB__gt_uint8
+// A+B function (eWiseAdd):         GB_AaddB__gt_uint8
+// A.*B function (eWiseMult):       GB_AemultB__gt_uint8
+// A*D function (colscale):         GB_AxD__gt_uint8
+// D*A function (rowscale):         GB_DxB__gt_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__gt_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__gt_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__gt_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_GT || GxB_NO_UINT8 || GxB_NO_GT_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__gt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__gt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__gt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__include.h b/Source/Generated/GB_binop__include.h
index 9895cb4575..08b0818939 100644
--- a/Source/Generated/GB_binop__include.h
+++ b/Source/Generated/GB_binop__include.h
@@ -2,11 +2,50 @@
 // GB_binop__include.h: definitions for GB_binop__*.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // This file has been automatically generated from Generator/GB_binop.h
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_int8
 (
     GrB_Matrix C,
@@ -31,6 +70,7 @@ GrB_Info GB_AaddB__first_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -46,6 +86,7 @@ GrB_Info GB_AemultB__first_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -56,6 +97,45 @@ GrB_Info GB_AemultB__first_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_int16
 (
     GrB_Matrix C,
@@ -80,6 +160,7 @@ GrB_Info GB_AaddB__first_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -95,6 +176,7 @@ GrB_Info GB_AemultB__first_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -105,6 +187,45 @@ GrB_Info GB_AemultB__first_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_int32
 (
     GrB_Matrix C,
@@ -129,6 +250,7 @@ GrB_Info GB_AaddB__first_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -144,6 +266,7 @@ GrB_Info GB_AemultB__first_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -154,6 +277,45 @@ GrB_Info GB_AemultB__first_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_int64
 (
     GrB_Matrix C,
@@ -178,6 +340,7 @@ GrB_Info GB_AaddB__first_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -193,6 +356,7 @@ GrB_Info GB_AemultB__first_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -203,6 +367,45 @@ GrB_Info GB_AemultB__first_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_uint8
 (
     GrB_Matrix C,
@@ -227,6 +430,7 @@ GrB_Info GB_AaddB__first_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -242,6 +446,7 @@ GrB_Info GB_AemultB__first_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -252,6 +457,45 @@ GrB_Info GB_AemultB__first_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_uint16
 (
     GrB_Matrix C,
@@ -276,6 +520,7 @@ GrB_Info GB_AaddB__first_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -291,6 +536,7 @@ GrB_Info GB_AemultB__first_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -301,6 +547,45 @@ GrB_Info GB_AemultB__first_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_uint32
 (
     GrB_Matrix C,
@@ -325,6 +610,7 @@ GrB_Info GB_AaddB__first_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -340,6 +626,7 @@ GrB_Info GB_AemultB__first_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -350,6 +637,45 @@ GrB_Info GB_AemultB__first_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_uint64
 (
     GrB_Matrix C,
@@ -374,6 +700,7 @@ GrB_Info GB_AaddB__first_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -389,6 +716,7 @@ GrB_Info GB_AemultB__first_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -399,6 +727,45 @@ GrB_Info GB_AemultB__first_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_fp32
 (
     GrB_Matrix C,
@@ -423,6 +790,7 @@ GrB_Info GB_AaddB__first_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -438,6 +806,7 @@ GrB_Info GB_AemultB__first_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -448,6 +817,45 @@ GrB_Info GB_AemultB__first_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_fp64
 (
     GrB_Matrix C,
@@ -472,6 +880,7 @@ GrB_Info GB_AaddB__first_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -487,6 +896,7 @@ GrB_Info GB_AemultB__first_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -497,6 +907,45 @@ GrB_Info GB_AemultB__first_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__first_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__first_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__first_bool
 (
     GrB_Matrix C,
@@ -521,6 +970,7 @@ GrB_Info GB_AaddB__first_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -536,6 +986,7 @@ GrB_Info GB_AemultB__first_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -546,6 +997,45 @@ GrB_Info GB_AemultB__first_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_int8
 (
     GrB_Matrix C,
@@ -570,6 +1060,7 @@ GrB_Info GB_AaddB__second_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -585,6 +1076,7 @@ GrB_Info GB_AemultB__second_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -595,6 +1087,45 @@ GrB_Info GB_AemultB__second_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_int16
 (
     GrB_Matrix C,
@@ -619,6 +1150,7 @@ GrB_Info GB_AaddB__second_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -634,6 +1166,7 @@ GrB_Info GB_AemultB__second_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -644,6 +1177,45 @@ GrB_Info GB_AemultB__second_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_int32
 (
     GrB_Matrix C,
@@ -668,6 +1240,7 @@ GrB_Info GB_AaddB__second_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -683,6 +1256,7 @@ GrB_Info GB_AemultB__second_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -693,6 +1267,45 @@ GrB_Info GB_AemultB__second_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_int64
 (
     GrB_Matrix C,
@@ -717,6 +1330,7 @@ GrB_Info GB_AaddB__second_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -732,6 +1346,7 @@ GrB_Info GB_AemultB__second_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -742,6 +1357,45 @@ GrB_Info GB_AemultB__second_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_uint8
 (
     GrB_Matrix C,
@@ -766,6 +1420,7 @@ GrB_Info GB_AaddB__second_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -781,6 +1436,7 @@ GrB_Info GB_AemultB__second_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -791,6 +1447,45 @@ GrB_Info GB_AemultB__second_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_uint16
 (
     GrB_Matrix C,
@@ -815,6 +1510,7 @@ GrB_Info GB_AaddB__second_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -830,6 +1526,7 @@ GrB_Info GB_AemultB__second_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -840,6 +1537,45 @@ GrB_Info GB_AemultB__second_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_uint32
 (
     GrB_Matrix C,
@@ -864,6 +1600,7 @@ GrB_Info GB_AaddB__second_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -879,6 +1616,7 @@ GrB_Info GB_AemultB__second_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -889,6 +1627,45 @@ GrB_Info GB_AemultB__second_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_uint64
 (
     GrB_Matrix C,
@@ -913,6 +1690,7 @@ GrB_Info GB_AaddB__second_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -928,6 +1706,7 @@ GrB_Info GB_AemultB__second_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -938,6 +1717,45 @@ GrB_Info GB_AemultB__second_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_fp32
 (
     GrB_Matrix C,
@@ -962,6 +1780,7 @@ GrB_Info GB_AaddB__second_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -977,6 +1796,7 @@ GrB_Info GB_AemultB__second_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -987,6 +1807,45 @@ GrB_Info GB_AemultB__second_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__second_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__second_fp64
 (
     GrB_Matrix C,
@@ -1011,6 +1870,7 @@ GrB_Info GB_AaddB__second_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1026,6 +1886,7 @@ GrB_Info GB_AemultB__second_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1036,56 +1897,46 @@ GrB_Info GB_AemultB__second_fp64
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__second_bool
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__second_bool
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__second_bool
+GrB_Info GB_Cdense_accumA__second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__second_bool
+GrB_Info GB_Cdense_accumX__second_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_int8
+GrB_Info GB_AxD__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1097,7 +1948,7 @@ GrB_Info GB_AxD__min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_int8
+GrB_Info GB_DxB__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1105,10 +1956,11 @@ GrB_Info GB_DxB__min_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_int8
+GrB_Info GB_AaddB__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1120,10 +1972,11 @@ GrB_Info GB_AaddB__min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_int8
+GrB_Info GB_AemultB__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1134,56 +1987,46 @@ GrB_Info GB_AemultB__min_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_int16
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_int16
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_int16
+GrB_Info GB_Cdense_accumA__pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_int16
+GrB_Info GB_Cdense_accumX__pair_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_int32
+GrB_Info GB_AxD__pair_int8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1195,7 +2038,7 @@ GrB_Info GB_AxD__min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_int32
+GrB_Info GB_DxB__pair_int8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1203,10 +2046,11 @@ GrB_Info GB_DxB__min_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_int32
+GrB_Info GB_AaddB__pair_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1218,10 +2062,11 @@ GrB_Info GB_AaddB__min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_int32
+GrB_Info GB_AemultB__pair_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1232,56 +2077,46 @@ GrB_Info GB_AemultB__min_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_int64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_int64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_int64
+GrB_Info GB_Cdense_accumA__pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_int64
+GrB_Info GB_Cdense_accumX__pair_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_uint8
+GrB_Info GB_AxD__pair_int16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1293,7 +2128,7 @@ GrB_Info GB_AxD__min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_uint8
+GrB_Info GB_DxB__pair_int16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1301,10 +2136,11 @@ GrB_Info GB_DxB__min_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_uint8
+GrB_Info GB_AaddB__pair_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1316,10 +2152,11 @@ GrB_Info GB_AaddB__min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_uint8
+GrB_Info GB_AemultB__pair_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1330,56 +2167,46 @@ GrB_Info GB_AemultB__min_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_uint16
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_uint16
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_uint16
+GrB_Info GB_Cdense_accumA__pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_uint16
+GrB_Info GB_Cdense_accumX__pair_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_uint32
+GrB_Info GB_AxD__pair_int32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1391,7 +2218,7 @@ GrB_Info GB_AxD__min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_uint32
+GrB_Info GB_DxB__pair_int32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1399,10 +2226,11 @@ GrB_Info GB_DxB__min_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_uint32
+GrB_Info GB_AaddB__pair_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1414,10 +2242,11 @@ GrB_Info GB_AaddB__min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_uint32
+GrB_Info GB_AemultB__pair_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1428,56 +2257,46 @@ GrB_Info GB_AemultB__min_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_uint64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_uint64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_uint64
+GrB_Info GB_Cdense_accumA__pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_uint64
+GrB_Info GB_Cdense_accumX__pair_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_fp32
+GrB_Info GB_AxD__pair_int64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1489,7 +2308,7 @@ GrB_Info GB_AxD__min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_fp32
+GrB_Info GB_DxB__pair_int64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1497,10 +2316,11 @@ GrB_Info GB_DxB__min_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_fp32
+GrB_Info GB_AaddB__pair_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1512,10 +2332,11 @@ GrB_Info GB_AaddB__min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_fp32
+GrB_Info GB_AemultB__pair_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1526,56 +2347,46 @@ GrB_Info GB_AemultB__min_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__min_fp64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__min_fp64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__min_fp64
+GrB_Info GB_Cdense_accumA__pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__min_fp64
+GrB_Info GB_Cdense_accumX__pair_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_int8
+GrB_Info GB_AxD__pair_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1587,7 +2398,7 @@ GrB_Info GB_AxD__max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_int8
+GrB_Info GB_DxB__pair_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1595,10 +2406,11 @@ GrB_Info GB_DxB__max_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_int8
+GrB_Info GB_AaddB__pair_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1610,10 +2422,11 @@ GrB_Info GB_AaddB__max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_int8
+GrB_Info GB_AemultB__pair_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1624,56 +2437,46 @@ GrB_Info GB_AemultB__max_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_int16
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_int16
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_int16
+GrB_Info GB_Cdense_accumA__pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_int16
+GrB_Info GB_Cdense_accumX__pair_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_int32
+GrB_Info GB_AxD__pair_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1685,7 +2488,7 @@ GrB_Info GB_AxD__max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_int32
+GrB_Info GB_DxB__pair_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1693,10 +2496,11 @@ GrB_Info GB_DxB__max_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_int32
+GrB_Info GB_AaddB__pair_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1708,10 +2512,11 @@ GrB_Info GB_AaddB__max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_int32
+GrB_Info GB_AemultB__pair_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1722,56 +2527,46 @@ GrB_Info GB_AemultB__max_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_int64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_int64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_int64
+GrB_Info GB_Cdense_accumA__pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_int64
+GrB_Info GB_Cdense_accumX__pair_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_uint8
+GrB_Info GB_AxD__pair_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1783,7 +2578,7 @@ GrB_Info GB_AxD__max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_uint8
+GrB_Info GB_DxB__pair_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1791,10 +2586,11 @@ GrB_Info GB_DxB__max_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_uint8
+GrB_Info GB_AaddB__pair_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1806,10 +2602,11 @@ GrB_Info GB_AaddB__max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_uint8
+GrB_Info GB_AemultB__pair_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1820,56 +2617,46 @@ GrB_Info GB_AemultB__max_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_uint16
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_uint16
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_uint16
+GrB_Info GB_Cdense_accumA__pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_uint16
+GrB_Info GB_Cdense_accumX__pair_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_uint32
+GrB_Info GB_AxD__pair_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1881,7 +2668,7 @@ GrB_Info GB_AxD__max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_uint32
+GrB_Info GB_DxB__pair_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1889,10 +2676,11 @@ GrB_Info GB_DxB__max_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_uint32
+GrB_Info GB_AaddB__pair_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -1904,10 +2692,11 @@ GrB_Info GB_AaddB__max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_uint32
+GrB_Info GB_AemultB__pair_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -1918,56 +2707,46 @@ GrB_Info GB_AemultB__max_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_uint64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_uint64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_uint64
+GrB_Info GB_Cdense_accumA__pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_uint64
+GrB_Info GB_Cdense_accumX__pair_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_fp32
+GrB_Info GB_AxD__pair_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -1979,7 +2758,7 @@ GrB_Info GB_AxD__max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_fp32
+GrB_Info GB_DxB__pair_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -1987,10 +2766,11 @@ GrB_Info GB_DxB__max_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_fp32
+GrB_Info GB_AaddB__pair_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2002,10 +2782,11 @@ GrB_Info GB_AaddB__max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_fp32
+GrB_Info GB_AemultB__pair_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2016,56 +2797,46 @@ GrB_Info GB_AemultB__max_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__max_fp64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__max_fp64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__max_fp64
+GrB_Info GB_Cdense_accumA__pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__max_fp64
+GrB_Info GB_Cdense_accumX__pair_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_int8
+GrB_Info GB_AxD__pair_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2077,7 +2848,7 @@ GrB_Info GB_AxD__plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_int8
+GrB_Info GB_DxB__pair_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2085,10 +2856,11 @@ GrB_Info GB_DxB__plus_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_int8
+GrB_Info GB_AaddB__pair_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2100,10 +2872,11 @@ GrB_Info GB_AaddB__plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_int8
+GrB_Info GB_AemultB__pair_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2114,56 +2887,46 @@ GrB_Info GB_AemultB__plus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_int16
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_int16
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_int16
+GrB_Info GB_Cdense_accumA__pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_int16
+GrB_Info GB_Cdense_accumX__pair_bool
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_int32
+GrB_Info GB_AxD__pair_bool
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2175,7 +2938,7 @@ GrB_Info GB_AxD__plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_int32
+GrB_Info GB_DxB__pair_bool
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2183,10 +2946,11 @@ GrB_Info GB_DxB__plus_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_int32
+GrB_Info GB_AaddB__pair_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2198,10 +2962,11 @@ GrB_Info GB_AaddB__plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_int32
+GrB_Info GB_AemultB__pair_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2212,7 +2977,46 @@ GrB_Info GB_AemultB__plus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_int64
+
+
+
+void GB_Cdense_ewise3_accum__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__min_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2224,7 +3028,7 @@ GrB_Info GB_AxD__plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_int64
+GrB_Info GB_DxB__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2232,10 +3036,11 @@ GrB_Info GB_DxB__plus_int64
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_int64
+GrB_Info GB_AaddB__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2247,10 +3052,11 @@ GrB_Info GB_AaddB__plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_int64
+GrB_Info GB_AemultB__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2261,7 +3067,46 @@ GrB_Info GB_AemultB__plus_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_uint8
+
+
+
+void GB_Cdense_ewise3_accum__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__min_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2273,7 +3118,7 @@ GrB_Info GB_AxD__plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_uint8
+GrB_Info GB_DxB__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2281,10 +3126,11 @@ GrB_Info GB_DxB__plus_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_uint8
+GrB_Info GB_AaddB__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2296,10 +3142,11 @@ GrB_Info GB_AaddB__plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_uint8
+GrB_Info GB_AemultB__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2310,56 +3157,46 @@ GrB_Info GB_AemultB__plus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_uint16
+
+
+
+void GB_Cdense_ewise3_accum__min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_uint16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_uint16
+GrB_Info GB_Cdense_accumA__min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_uint16
+GrB_Info GB_Cdense_accumX__min_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_uint32
+GrB_Info GB_AxD__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2371,7 +3208,7 @@ GrB_Info GB_AxD__plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_uint32
+GrB_Info GB_DxB__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2379,10 +3216,11 @@ GrB_Info GB_DxB__plus_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_uint32
+GrB_Info GB_AaddB__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2394,10 +3232,11 @@ GrB_Info GB_AaddB__plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_uint32
+GrB_Info GB_AemultB__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2408,56 +3247,46 @@ GrB_Info GB_AemultB__plus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_uint64
+
+
+
+void GB_Cdense_ewise3_accum__min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_uint64
+GrB_Info GB_Cdense_accumA__min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_uint64
+GrB_Info GB_Cdense_accumX__min_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_fp32
+GrB_Info GB_AxD__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2469,7 +3298,7 @@ GrB_Info GB_AxD__plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_fp32
+GrB_Info GB_DxB__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2477,10 +3306,11 @@ GrB_Info GB_DxB__plus_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_fp32
+GrB_Info GB_AaddB__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2492,10 +3322,11 @@ GrB_Info GB_AaddB__plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_fp32
+GrB_Info GB_AemultB__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2506,56 +3337,46 @@ GrB_Info GB_AemultB__plus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__plus_fp64
+
+
+
+void GB_Cdense_ewise3_accum__min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__plus_fp64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__plus_fp64
+GrB_Info GB_Cdense_accumA__min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__plus_fp64
+GrB_Info GB_Cdense_accumX__min_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_int8
+GrB_Info GB_AxD__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2567,7 +3388,7 @@ GrB_Info GB_AxD__minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_int8
+GrB_Info GB_DxB__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2575,10 +3396,11 @@ GrB_Info GB_DxB__minus_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_int8
+GrB_Info GB_AaddB__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2590,10 +3412,11 @@ GrB_Info GB_AaddB__minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_int8
+GrB_Info GB_AemultB__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2604,56 +3427,46 @@ GrB_Info GB_AemultB__minus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_int16
+
+
+
+void GB_Cdense_ewise3_accum__min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_int16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_int16
+GrB_Info GB_Cdense_accumA__min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_int16
+GrB_Info GB_Cdense_accumX__min_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_int32
+GrB_Info GB_AxD__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2665,7 +3478,7 @@ GrB_Info GB_AxD__minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_int32
+GrB_Info GB_DxB__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2673,10 +3486,11 @@ GrB_Info GB_DxB__minus_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_int32
+GrB_Info GB_AaddB__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2688,10 +3502,11 @@ GrB_Info GB_AaddB__minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_int32
+GrB_Info GB_AemultB__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2702,56 +3517,46 @@ GrB_Info GB_AemultB__minus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_int64
+
+
+
+void GB_Cdense_ewise3_accum__min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_int64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_int64
+GrB_Info GB_Cdense_accumA__min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_int64
+GrB_Info GB_Cdense_accumX__min_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_uint8
+GrB_Info GB_AxD__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2763,7 +3568,7 @@ GrB_Info GB_AxD__minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_uint8
+GrB_Info GB_DxB__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2771,10 +3576,11 @@ GrB_Info GB_DxB__minus_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_uint8
+GrB_Info GB_AaddB__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2786,10 +3592,11 @@ GrB_Info GB_AaddB__minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_uint8
+GrB_Info GB_AemultB__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2800,56 +3607,46 @@ GrB_Info GB_AemultB__minus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_uint16
+
+
+
+void GB_Cdense_ewise3_accum__min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_uint16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_uint16
+GrB_Info GB_Cdense_accumA__min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_uint16
+GrB_Info GB_Cdense_accumX__min_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_uint32
+GrB_Info GB_AxD__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2861,7 +3658,7 @@ GrB_Info GB_AxD__minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_uint32
+GrB_Info GB_DxB__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2869,10 +3666,11 @@ GrB_Info GB_DxB__minus_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_uint32
+GrB_Info GB_AaddB__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2884,10 +3682,11 @@ GrB_Info GB_AaddB__minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_uint32
+GrB_Info GB_AemultB__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2898,56 +3697,46 @@ GrB_Info GB_AemultB__minus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_uint64
+
+
+
+void GB_Cdense_ewise3_accum__min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_uint64
+GrB_Info GB_Cdense_accumA__min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_uint64
+GrB_Info GB_Cdense_accumX__min_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_fp32
+GrB_Info GB_AxD__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -2959,7 +3748,7 @@ GrB_Info GB_AxD__minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_fp32
+GrB_Info GB_DxB__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -2967,10 +3756,11 @@ GrB_Info GB_DxB__minus_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_fp32
+GrB_Info GB_AaddB__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -2982,10 +3772,11 @@ GrB_Info GB_AaddB__minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_fp32
+GrB_Info GB_AemultB__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -2996,56 +3787,46 @@ GrB_Info GB_AemultB__minus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__minus_fp64
+
+
+
+void GB_Cdense_ewise3_accum__min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__minus_fp64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__minus_fp64
+GrB_Info GB_Cdense_accumA__min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__minus_fp64
+GrB_Info GB_Cdense_accumX__min_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_int8
+GrB_Info GB_AxD__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3057,7 +3838,7 @@ GrB_Info GB_AxD__rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_int8
+GrB_Info GB_DxB__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3065,10 +3846,11 @@ GrB_Info GB_DxB__rminus_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_int8
+GrB_Info GB_AaddB__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3080,10 +3862,11 @@ GrB_Info GB_AaddB__rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_int8
+GrB_Info GB_AemultB__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3094,56 +3877,46 @@ GrB_Info GB_AemultB__rminus_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_int16
+
+
+
+void GB_Cdense_ewise3_accum__max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_int16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_int16
+GrB_Info GB_Cdense_accumA__max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_int16
+GrB_Info GB_Cdense_accumX__max_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_int32
+GrB_Info GB_AxD__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3155,7 +3928,7 @@ GrB_Info GB_AxD__rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_int32
+GrB_Info GB_DxB__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3163,10 +3936,11 @@ GrB_Info GB_DxB__rminus_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_int32
+GrB_Info GB_AaddB__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3178,10 +3952,11 @@ GrB_Info GB_AaddB__rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_int32
+GrB_Info GB_AemultB__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3192,56 +3967,46 @@ GrB_Info GB_AemultB__rminus_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_int64
+
+
+
+void GB_Cdense_ewise3_accum__max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_int64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_int64
+GrB_Info GB_Cdense_accumA__max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_int64
+GrB_Info GB_Cdense_accumX__max_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_uint8
+GrB_Info GB_AxD__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3253,7 +4018,7 @@ GrB_Info GB_AxD__rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_uint8
+GrB_Info GB_DxB__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3261,10 +4026,11 @@ GrB_Info GB_DxB__rminus_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_uint8
+GrB_Info GB_AaddB__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3276,10 +4042,11 @@ GrB_Info GB_AaddB__rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_uint8
+GrB_Info GB_AemultB__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3290,56 +4057,46 @@ GrB_Info GB_AemultB__rminus_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_uint16
+
+
+
+void GB_Cdense_ewise3_accum__max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_uint16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_uint16
+GrB_Info GB_Cdense_accumA__max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_uint16
+GrB_Info GB_Cdense_accumX__max_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_uint32
+GrB_Info GB_AxD__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3351,7 +4108,7 @@ GrB_Info GB_AxD__rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_uint32
+GrB_Info GB_DxB__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3359,10 +4116,11 @@ GrB_Info GB_DxB__rminus_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_uint32
+GrB_Info GB_AaddB__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3374,10 +4132,11 @@ GrB_Info GB_AaddB__rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_uint32
+GrB_Info GB_AemultB__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3388,56 +4147,46 @@ GrB_Info GB_AemultB__rminus_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_uint64
+
+
+
+void GB_Cdense_ewise3_accum__max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_uint64
+GrB_Info GB_Cdense_accumA__max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_uint64
+GrB_Info GB_Cdense_accumX__max_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_fp32
+GrB_Info GB_AxD__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3449,7 +4198,7 @@ GrB_Info GB_AxD__rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_fp32
+GrB_Info GB_DxB__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3457,10 +4206,11 @@ GrB_Info GB_DxB__rminus_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_fp32
+GrB_Info GB_AaddB__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3472,10 +4222,11 @@ GrB_Info GB_AaddB__rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_fp32
+GrB_Info GB_AemultB__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3486,56 +4237,46 @@ GrB_Info GB_AemultB__rminus_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rminus_fp64
+
+
+
+void GB_Cdense_ewise3_accum__max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rminus_fp64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rminus_fp64
+GrB_Info GB_Cdense_accumA__max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rminus_fp64
+GrB_Info GB_Cdense_accumX__max_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_int8
+GrB_Info GB_AxD__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3547,7 +4288,7 @@ GrB_Info GB_AxD__times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_int8
+GrB_Info GB_DxB__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3555,10 +4296,11 @@ GrB_Info GB_DxB__times_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_int8
+GrB_Info GB_AaddB__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3570,10 +4312,11 @@ GrB_Info GB_AaddB__times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_int8
+GrB_Info GB_AemultB__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3584,56 +4327,46 @@ GrB_Info GB_AemultB__times_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_int16
+
+
+
+void GB_Cdense_ewise3_accum__max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_int16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_int16
+GrB_Info GB_Cdense_accumA__max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_int16
+GrB_Info GB_Cdense_accumX__max_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_int32
+GrB_Info GB_AxD__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3645,7 +4378,7 @@ GrB_Info GB_AxD__times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_int32
+GrB_Info GB_DxB__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3653,10 +4386,11 @@ GrB_Info GB_DxB__times_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_int32
+GrB_Info GB_AaddB__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3668,10 +4402,11 @@ GrB_Info GB_AaddB__times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_int32
+GrB_Info GB_AemultB__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3682,7 +4417,46 @@ GrB_Info GB_AemultB__times_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_int64
+
+
+
+void GB_Cdense_ewise3_accum__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__max_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3694,7 +4468,7 @@ GrB_Info GB_AxD__times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_int64
+GrB_Info GB_DxB__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3702,10 +4476,11 @@ GrB_Info GB_DxB__times_int64
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_int64
+GrB_Info GB_AaddB__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3717,10 +4492,11 @@ GrB_Info GB_AaddB__times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_int64
+GrB_Info GB_AemultB__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3731,7 +4507,46 @@ GrB_Info GB_AemultB__times_int64
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_uint8
+
+
+
+void GB_Cdense_ewise3_accum__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__max_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3743,7 +4558,7 @@ GrB_Info GB_AxD__times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_uint8
+GrB_Info GB_DxB__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3751,10 +4566,11 @@ GrB_Info GB_DxB__times_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_uint8
+GrB_Info GB_AaddB__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3766,10 +4582,11 @@ GrB_Info GB_AaddB__times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_uint8
+GrB_Info GB_AemultB__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3780,8 +4597,4277 @@ GrB_Info GB_AemultB__times_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_uint16
-(
+
+
+
+void GB_Cdense_ewise3_accum__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__max_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__max_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__plus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__minus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__rminus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__times_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_uint16
+(
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3792,7 +8878,7 @@ GrB_Info GB_AxD__times_uint16
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_uint16
+GrB_Info GB_DxB__div_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3800,36 +8886,257 @@ GrB_Info GB_DxB__times_uint16
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_uint16
+GrB_Info GB_AaddB__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__div_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_DxB__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+) ;
+
+GrB_Info GB_AaddB__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_AemultB__div_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+
+
+void GB_Cdense_ewise3_accum__div_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
     const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_uint16
+GrB_Info GB_Cdense_accumA__div_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_uint32
+GrB_Info GB_Cdense_accumX__div_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
+GrB_Info GB_AxD__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3841,7 +9148,7 @@ GrB_Info GB_AxD__times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_uint32
+GrB_Info GB_DxB__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3849,10 +9156,11 @@ GrB_Info GB_DxB__times_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_uint32
+GrB_Info GB_AaddB__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3864,10 +9172,11 @@ GrB_Info GB_AaddB__times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_uint32
+GrB_Info GB_AemultB__div_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3878,56 +9187,46 @@ GrB_Info GB_AemultB__times_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_uint64
+
+
+
+void GB_Cdense_ewise3_accum__div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_uint64
+GrB_Info GB_Cdense_accumA__div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_uint64
+GrB_Info GB_Cdense_accumX__div_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_fp32
+GrB_Info GB_AxD__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -3939,7 +9238,7 @@ GrB_Info GB_AxD__times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_fp32
+GrB_Info GB_DxB__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -3947,10 +9246,11 @@ GrB_Info GB_DxB__times_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_fp32
+GrB_Info GB_AaddB__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -3962,10 +9262,11 @@ GrB_Info GB_AaddB__times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_fp32
+GrB_Info GB_AemultB__div_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -3976,56 +9277,46 @@ GrB_Info GB_AemultB__times_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__times_fp64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__times_fp64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__times_fp64
+GrB_Info GB_Cdense_accumA__rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__times_fp64
+GrB_Info GB_Cdense_accumX__rdiv_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_int8
+GrB_Info GB_AxD__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4037,7 +9328,7 @@ GrB_Info GB_AxD__div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_int8
+GrB_Info GB_DxB__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4045,10 +9336,11 @@ GrB_Info GB_DxB__div_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_int8
+GrB_Info GB_AaddB__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4060,10 +9352,11 @@ GrB_Info GB_AaddB__div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_int8
+GrB_Info GB_AemultB__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4074,56 +9367,46 @@ GrB_Info GB_AemultB__div_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_int16
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_int16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_int16
+GrB_Info GB_Cdense_accumA__rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_int16
+GrB_Info GB_Cdense_accumX__rdiv_int16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_int32
+GrB_Info GB_AxD__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4135,7 +9418,7 @@ GrB_Info GB_AxD__div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_int32
+GrB_Info GB_DxB__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4143,10 +9426,11 @@ GrB_Info GB_DxB__div_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_int32
+GrB_Info GB_AaddB__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4158,10 +9442,11 @@ GrB_Info GB_AaddB__div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_int32
+GrB_Info GB_AemultB__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4172,56 +9457,46 @@ GrB_Info GB_AemultB__div_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_int64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_int64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_int64
+GrB_Info GB_Cdense_accumA__rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_int64
+GrB_Info GB_Cdense_accumX__rdiv_int32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_uint8
+GrB_Info GB_AxD__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4233,7 +9508,7 @@ GrB_Info GB_AxD__div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_uint8
+GrB_Info GB_DxB__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4241,10 +9516,11 @@ GrB_Info GB_DxB__div_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_uint8
+GrB_Info GB_AaddB__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4256,10 +9532,11 @@ GrB_Info GB_AaddB__div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_uint8
+GrB_Info GB_AemultB__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4270,56 +9547,46 @@ GrB_Info GB_AemultB__div_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_uint16
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_uint16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_uint16
+GrB_Info GB_Cdense_accumA__rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_uint16
+GrB_Info GB_Cdense_accumX__rdiv_int64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_uint32
+GrB_Info GB_AxD__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4331,7 +9598,7 @@ GrB_Info GB_AxD__div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_uint32
+GrB_Info GB_DxB__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4339,10 +9606,11 @@ GrB_Info GB_DxB__div_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_uint32
+GrB_Info GB_AaddB__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4354,10 +9622,11 @@ GrB_Info GB_AaddB__div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_uint32
+GrB_Info GB_AemultB__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4368,56 +9637,46 @@ GrB_Info GB_AemultB__div_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_uint64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_uint64
+GrB_Info GB_Cdense_accumA__rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_uint64
+GrB_Info GB_Cdense_accumX__rdiv_uint8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_fp32
+GrB_Info GB_AxD__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4429,7 +9688,7 @@ GrB_Info GB_AxD__div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_fp32
+GrB_Info GB_DxB__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4437,10 +9696,11 @@ GrB_Info GB_DxB__div_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_fp32
+GrB_Info GB_AaddB__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4452,10 +9712,11 @@ GrB_Info GB_AaddB__div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_fp32
+GrB_Info GB_AemultB__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4466,56 +9727,46 @@ GrB_Info GB_AemultB__div_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__div_fp64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__div_fp64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__div_fp64
+GrB_Info GB_Cdense_accumA__rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__div_fp64
+GrB_Info GB_Cdense_accumX__rdiv_uint16
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_int8
+GrB_Info GB_AxD__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4527,7 +9778,7 @@ GrB_Info GB_AxD__rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_int8
+GrB_Info GB_DxB__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4535,10 +9786,11 @@ GrB_Info GB_DxB__rdiv_int8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_int8
+GrB_Info GB_AaddB__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4550,10 +9802,11 @@ GrB_Info GB_AaddB__rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_int8
+GrB_Info GB_AemultB__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4564,56 +9817,46 @@ GrB_Info GB_AemultB__rdiv_int8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_int16
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_int16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_int16
+GrB_Info GB_Cdense_accumA__rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_int16
+GrB_Info GB_Cdense_accumX__rdiv_uint32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_int32
+GrB_Info GB_AxD__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4625,7 +9868,7 @@ GrB_Info GB_AxD__rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_int32
+GrB_Info GB_DxB__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4633,10 +9876,11 @@ GrB_Info GB_DxB__rdiv_int32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_int32
+GrB_Info GB_AaddB__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4648,10 +9892,11 @@ GrB_Info GB_AaddB__rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_int32
+GrB_Info GB_AemultB__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4662,56 +9907,46 @@ GrB_Info GB_AemultB__rdiv_int32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_int64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_int64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_int64
+GrB_Info GB_Cdense_accumA__rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_int64
+GrB_Info GB_Cdense_accumX__rdiv_uint64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_uint8
+GrB_Info GB_AxD__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4723,7 +9958,7 @@ GrB_Info GB_AxD__rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_uint8
+GrB_Info GB_DxB__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4731,10 +9966,11 @@ GrB_Info GB_DxB__rdiv_uint8
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_uint8
+GrB_Info GB_AaddB__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4746,10 +9982,11 @@ GrB_Info GB_AaddB__rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_uint8
+GrB_Info GB_AemultB__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4760,56 +9997,46 @@ GrB_Info GB_AemultB__rdiv_uint8
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_uint16
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_uint16
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_uint16
+GrB_Info GB_Cdense_accumA__rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_uint16
+GrB_Info GB_Cdense_accumX__rdiv_fp32
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_uint32
+GrB_Info GB_AxD__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4821,7 +10048,7 @@ GrB_Info GB_AxD__rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_uint32
+GrB_Info GB_DxB__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4829,10 +10056,11 @@ GrB_Info GB_DxB__rdiv_uint32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_uint32
+GrB_Info GB_AaddB__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4844,10 +10072,11 @@ GrB_Info GB_AaddB__rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_uint32
+GrB_Info GB_AemultB__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4858,56 +10087,46 @@ GrB_Info GB_AemultB__rdiv_uint32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_uint64
+
+
+
+void GB_Cdense_ewise3_accum__rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_uint64
+
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_uint64
+GrB_Info GB_Cdense_accumA__rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_uint64
+GrB_Info GB_Cdense_accumX__rdiv_fp64
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_fp32
+GrB_Info GB_AxD__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix A, bool A_is_pattern,
@@ -4919,7 +10138,7 @@ GrB_Info GB_AxD__rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_fp32
+GrB_Info GB_DxB__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix D, bool D_is_pattern,
@@ -4927,10 +10146,11 @@ GrB_Info GB_DxB__rdiv_fp32
     int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_fp32
+GrB_Info GB_AaddB__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -4942,10 +10162,11 @@ GrB_Info GB_AaddB__rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_fp32
+GrB_Info GB_AemultB__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -4956,52 +10177,42 @@ GrB_Info GB_AemultB__rdiv_fp32
     const int nthreads
 ) ;
 
-GrB_Info GB_AxD__rdiv_fp64
+
+#if 0
+
+void (none)
 (
     GrB_Matrix C,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix D, bool D_is_pattern,
-    const int64_t *GB_RESTRICT kfirst_slice,
-    const int64_t *GB_RESTRICT klast_slice,
-    const int64_t *GB_RESTRICT pstart_slice,
-    const int ntasks,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
     const int nthreads
 ) ;
 
-GrB_Info GB_DxB__rdiv_fp64
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix D, bool D_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    int nthreads
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
 ) ;
 
-GrB_Info GB_AaddB__rdiv_fp64
+GrB_Info GB_Cdense_accumA__iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
     const GrB_Matrix A,
-    const GrB_Matrix B,
-    const bool Ch_is_Mh,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
     const int ntasks,
     const int nthreads
 ) ;
 
-GrB_Info GB_AemultB__rdiv_fp64
+GrB_Info GB_Cdense_accumX__iseq_int8
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A,
-    const GrB_Matrix B,
-    const int64_t *GB_RESTRICT C_to_M,
-    const int64_t *GB_RESTRICT C_to_A,
-    const int64_t *GB_RESTRICT C_to_B,
-    const GB_task_struct *GB_RESTRICT TaskList,
-    const int ntasks,
+    const GB_void *p_ywork,
     const int nthreads
 ) ;
 
@@ -5029,6 +10240,7 @@ GrB_Info GB_AaddB__iseq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5044,6 +10256,7 @@ GrB_Info GB_AemultB__iseq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5054,6 +10267,45 @@ GrB_Info GB_AemultB__iseq_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_int16
 (
     GrB_Matrix C,
@@ -5078,6 +10330,7 @@ GrB_Info GB_AaddB__iseq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5093,6 +10346,7 @@ GrB_Info GB_AemultB__iseq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5103,6 +10357,45 @@ GrB_Info GB_AemultB__iseq_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_int32
 (
     GrB_Matrix C,
@@ -5127,6 +10420,7 @@ GrB_Info GB_AaddB__iseq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5142,6 +10436,7 @@ GrB_Info GB_AemultB__iseq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5152,6 +10447,45 @@ GrB_Info GB_AemultB__iseq_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_int64
 (
     GrB_Matrix C,
@@ -5176,6 +10510,7 @@ GrB_Info GB_AaddB__iseq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5191,6 +10526,7 @@ GrB_Info GB_AemultB__iseq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5201,6 +10537,45 @@ GrB_Info GB_AemultB__iseq_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_uint8
 (
     GrB_Matrix C,
@@ -5225,6 +10600,7 @@ GrB_Info GB_AaddB__iseq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5240,6 +10616,7 @@ GrB_Info GB_AemultB__iseq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5250,6 +10627,45 @@ GrB_Info GB_AemultB__iseq_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_uint16
 (
     GrB_Matrix C,
@@ -5274,6 +10690,7 @@ GrB_Info GB_AaddB__iseq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5289,6 +10706,7 @@ GrB_Info GB_AemultB__iseq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5299,6 +10717,45 @@ GrB_Info GB_AemultB__iseq_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_uint32
 (
     GrB_Matrix C,
@@ -5323,6 +10780,7 @@ GrB_Info GB_AaddB__iseq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5338,6 +10796,7 @@ GrB_Info GB_AemultB__iseq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5348,6 +10807,45 @@ GrB_Info GB_AemultB__iseq_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_uint64
 (
     GrB_Matrix C,
@@ -5372,6 +10870,7 @@ GrB_Info GB_AaddB__iseq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5387,6 +10886,7 @@ GrB_Info GB_AemultB__iseq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5397,6 +10897,45 @@ GrB_Info GB_AemultB__iseq_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_fp32
 (
     GrB_Matrix C,
@@ -5421,6 +10960,7 @@ GrB_Info GB_AaddB__iseq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5436,6 +10976,7 @@ GrB_Info GB_AemultB__iseq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5446,6 +10987,45 @@ GrB_Info GB_AemultB__iseq_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__iseq_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__iseq_fp64
 (
     GrB_Matrix C,
@@ -5470,6 +11050,7 @@ GrB_Info GB_AaddB__iseq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5485,6 +11066,7 @@ GrB_Info GB_AemultB__iseq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5495,6 +11077,45 @@ GrB_Info GB_AemultB__iseq_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_int8
 (
     GrB_Matrix C,
@@ -5519,6 +11140,7 @@ GrB_Info GB_AaddB__isne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5534,6 +11156,7 @@ GrB_Info GB_AemultB__isne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5544,6 +11167,45 @@ GrB_Info GB_AemultB__isne_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_int16
 (
     GrB_Matrix C,
@@ -5568,6 +11230,7 @@ GrB_Info GB_AaddB__isne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5583,6 +11246,7 @@ GrB_Info GB_AemultB__isne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5593,6 +11257,45 @@ GrB_Info GB_AemultB__isne_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_int32
 (
     GrB_Matrix C,
@@ -5617,6 +11320,7 @@ GrB_Info GB_AaddB__isne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5632,6 +11336,7 @@ GrB_Info GB_AemultB__isne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5642,6 +11347,45 @@ GrB_Info GB_AemultB__isne_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_int64
 (
     GrB_Matrix C,
@@ -5666,6 +11410,7 @@ GrB_Info GB_AaddB__isne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5681,6 +11426,7 @@ GrB_Info GB_AemultB__isne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5691,6 +11437,45 @@ GrB_Info GB_AemultB__isne_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_uint8
 (
     GrB_Matrix C,
@@ -5715,6 +11500,7 @@ GrB_Info GB_AaddB__isne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5730,6 +11516,7 @@ GrB_Info GB_AemultB__isne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5740,6 +11527,45 @@ GrB_Info GB_AemultB__isne_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_uint16
 (
     GrB_Matrix C,
@@ -5764,6 +11590,7 @@ GrB_Info GB_AaddB__isne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5779,6 +11606,7 @@ GrB_Info GB_AemultB__isne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5789,6 +11617,45 @@ GrB_Info GB_AemultB__isne_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_uint32
 (
     GrB_Matrix C,
@@ -5813,6 +11680,7 @@ GrB_Info GB_AaddB__isne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5828,6 +11696,7 @@ GrB_Info GB_AemultB__isne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5838,6 +11707,45 @@ GrB_Info GB_AemultB__isne_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_uint64
 (
     GrB_Matrix C,
@@ -5862,6 +11770,7 @@ GrB_Info GB_AaddB__isne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5877,6 +11786,7 @@ GrB_Info GB_AemultB__isne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5887,6 +11797,45 @@ GrB_Info GB_AemultB__isne_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_fp32
 (
     GrB_Matrix C,
@@ -5911,6 +11860,7 @@ GrB_Info GB_AaddB__isne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5926,6 +11876,7 @@ GrB_Info GB_AemultB__isne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5936,6 +11887,45 @@ GrB_Info GB_AemultB__isne_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isne_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isne_fp64
 (
     GrB_Matrix C,
@@ -5960,6 +11950,7 @@ GrB_Info GB_AaddB__isne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -5975,6 +11966,7 @@ GrB_Info GB_AemultB__isne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -5985,6 +11977,45 @@ GrB_Info GB_AemultB__isne_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_int8
 (
     GrB_Matrix C,
@@ -6009,6 +12040,7 @@ GrB_Info GB_AaddB__isgt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6024,6 +12056,7 @@ GrB_Info GB_AemultB__isgt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6034,6 +12067,45 @@ GrB_Info GB_AemultB__isgt_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_int16
 (
     GrB_Matrix C,
@@ -6058,6 +12130,7 @@ GrB_Info GB_AaddB__isgt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6073,6 +12146,7 @@ GrB_Info GB_AemultB__isgt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6083,6 +12157,45 @@ GrB_Info GB_AemultB__isgt_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_int32
 (
     GrB_Matrix C,
@@ -6107,6 +12220,7 @@ GrB_Info GB_AaddB__isgt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6122,6 +12236,7 @@ GrB_Info GB_AemultB__isgt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6132,6 +12247,45 @@ GrB_Info GB_AemultB__isgt_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_int64
 (
     GrB_Matrix C,
@@ -6156,6 +12310,7 @@ GrB_Info GB_AaddB__isgt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6171,6 +12326,7 @@ GrB_Info GB_AemultB__isgt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6181,6 +12337,45 @@ GrB_Info GB_AemultB__isgt_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_uint8
 (
     GrB_Matrix C,
@@ -6205,6 +12400,7 @@ GrB_Info GB_AaddB__isgt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6220,6 +12416,7 @@ GrB_Info GB_AemultB__isgt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6230,6 +12427,45 @@ GrB_Info GB_AemultB__isgt_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_uint16
 (
     GrB_Matrix C,
@@ -6254,6 +12490,7 @@ GrB_Info GB_AaddB__isgt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6269,6 +12506,7 @@ GrB_Info GB_AemultB__isgt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6279,6 +12517,45 @@ GrB_Info GB_AemultB__isgt_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_uint32
 (
     GrB_Matrix C,
@@ -6303,6 +12580,7 @@ GrB_Info GB_AaddB__isgt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6318,6 +12596,7 @@ GrB_Info GB_AemultB__isgt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6328,6 +12607,45 @@ GrB_Info GB_AemultB__isgt_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_uint64
 (
     GrB_Matrix C,
@@ -6352,6 +12670,7 @@ GrB_Info GB_AaddB__isgt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6367,6 +12686,7 @@ GrB_Info GB_AemultB__isgt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6377,6 +12697,45 @@ GrB_Info GB_AemultB__isgt_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_fp32
 (
     GrB_Matrix C,
@@ -6401,6 +12760,7 @@ GrB_Info GB_AaddB__isgt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6416,6 +12776,7 @@ GrB_Info GB_AemultB__isgt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6426,6 +12787,45 @@ GrB_Info GB_AemultB__isgt_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isgt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isgt_fp64
 (
     GrB_Matrix C,
@@ -6450,6 +12850,7 @@ GrB_Info GB_AaddB__isgt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6465,6 +12866,7 @@ GrB_Info GB_AemultB__isgt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6475,6 +12877,45 @@ GrB_Info GB_AemultB__isgt_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_int8
 (
     GrB_Matrix C,
@@ -6499,6 +12940,7 @@ GrB_Info GB_AaddB__islt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6514,6 +12956,7 @@ GrB_Info GB_AemultB__islt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6524,6 +12967,45 @@ GrB_Info GB_AemultB__islt_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_int16
 (
     GrB_Matrix C,
@@ -6548,6 +13030,7 @@ GrB_Info GB_AaddB__islt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6563,6 +13046,7 @@ GrB_Info GB_AemultB__islt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6573,6 +13057,45 @@ GrB_Info GB_AemultB__islt_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_int32
 (
     GrB_Matrix C,
@@ -6597,6 +13120,7 @@ GrB_Info GB_AaddB__islt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6612,6 +13136,7 @@ GrB_Info GB_AemultB__islt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6622,6 +13147,45 @@ GrB_Info GB_AemultB__islt_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_int64
 (
     GrB_Matrix C,
@@ -6646,6 +13210,7 @@ GrB_Info GB_AaddB__islt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6661,6 +13226,7 @@ GrB_Info GB_AemultB__islt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6671,6 +13237,45 @@ GrB_Info GB_AemultB__islt_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_uint8
 (
     GrB_Matrix C,
@@ -6695,6 +13300,7 @@ GrB_Info GB_AaddB__islt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6710,6 +13316,7 @@ GrB_Info GB_AemultB__islt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6720,6 +13327,45 @@ GrB_Info GB_AemultB__islt_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_uint16
 (
     GrB_Matrix C,
@@ -6744,6 +13390,7 @@ GrB_Info GB_AaddB__islt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6759,6 +13406,7 @@ GrB_Info GB_AemultB__islt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6769,6 +13417,45 @@ GrB_Info GB_AemultB__islt_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_uint32
 (
     GrB_Matrix C,
@@ -6793,6 +13480,7 @@ GrB_Info GB_AaddB__islt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6808,6 +13496,7 @@ GrB_Info GB_AemultB__islt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6818,6 +13507,45 @@ GrB_Info GB_AemultB__islt_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_uint64
 (
     GrB_Matrix C,
@@ -6842,6 +13570,7 @@ GrB_Info GB_AaddB__islt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6857,6 +13586,7 @@ GrB_Info GB_AemultB__islt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6867,6 +13597,45 @@ GrB_Info GB_AemultB__islt_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_fp32
 (
     GrB_Matrix C,
@@ -6891,6 +13660,7 @@ GrB_Info GB_AaddB__islt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6906,6 +13676,7 @@ GrB_Info GB_AemultB__islt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6916,6 +13687,45 @@ GrB_Info GB_AemultB__islt_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__islt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__islt_fp64
 (
     GrB_Matrix C,
@@ -6940,6 +13750,7 @@ GrB_Info GB_AaddB__islt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -6955,6 +13766,7 @@ GrB_Info GB_AemultB__islt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -6965,6 +13777,45 @@ GrB_Info GB_AemultB__islt_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_int8
 (
     GrB_Matrix C,
@@ -6989,6 +13840,7 @@ GrB_Info GB_AaddB__isge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7004,6 +13856,7 @@ GrB_Info GB_AemultB__isge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7014,6 +13867,45 @@ GrB_Info GB_AemultB__isge_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_int16
 (
     GrB_Matrix C,
@@ -7038,6 +13930,7 @@ GrB_Info GB_AaddB__isge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7053,6 +13946,7 @@ GrB_Info GB_AemultB__isge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7063,6 +13957,45 @@ GrB_Info GB_AemultB__isge_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_int32
 (
     GrB_Matrix C,
@@ -7087,6 +14020,7 @@ GrB_Info GB_AaddB__isge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7102,6 +14036,7 @@ GrB_Info GB_AemultB__isge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7112,6 +14047,45 @@ GrB_Info GB_AemultB__isge_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_int64
 (
     GrB_Matrix C,
@@ -7136,6 +14110,7 @@ GrB_Info GB_AaddB__isge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7151,6 +14126,7 @@ GrB_Info GB_AemultB__isge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7161,6 +14137,45 @@ GrB_Info GB_AemultB__isge_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_uint8
 (
     GrB_Matrix C,
@@ -7185,6 +14200,7 @@ GrB_Info GB_AaddB__isge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7200,6 +14216,7 @@ GrB_Info GB_AemultB__isge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7210,6 +14227,45 @@ GrB_Info GB_AemultB__isge_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_uint16
 (
     GrB_Matrix C,
@@ -7234,6 +14290,7 @@ GrB_Info GB_AaddB__isge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7249,6 +14306,7 @@ GrB_Info GB_AemultB__isge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7259,6 +14317,45 @@ GrB_Info GB_AemultB__isge_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_uint32
 (
     GrB_Matrix C,
@@ -7283,6 +14380,7 @@ GrB_Info GB_AaddB__isge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7298,6 +14396,7 @@ GrB_Info GB_AemultB__isge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7308,6 +14407,45 @@ GrB_Info GB_AemultB__isge_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_uint64
 (
     GrB_Matrix C,
@@ -7332,6 +14470,7 @@ GrB_Info GB_AaddB__isge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7347,6 +14486,7 @@ GrB_Info GB_AemultB__isge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7357,6 +14497,45 @@ GrB_Info GB_AemultB__isge_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_fp32
 (
     GrB_Matrix C,
@@ -7381,6 +14560,7 @@ GrB_Info GB_AaddB__isge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7396,6 +14576,7 @@ GrB_Info GB_AemultB__isge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7406,6 +14587,45 @@ GrB_Info GB_AemultB__isge_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isge_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isge_fp64
 (
     GrB_Matrix C,
@@ -7430,6 +14650,7 @@ GrB_Info GB_AaddB__isge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7445,6 +14666,7 @@ GrB_Info GB_AemultB__isge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7455,6 +14677,45 @@ GrB_Info GB_AemultB__isge_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_int8
 (
     GrB_Matrix C,
@@ -7479,6 +14740,7 @@ GrB_Info GB_AaddB__isle_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7494,6 +14756,7 @@ GrB_Info GB_AemultB__isle_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7504,6 +14767,45 @@ GrB_Info GB_AemultB__isle_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_int16
 (
     GrB_Matrix C,
@@ -7528,6 +14830,7 @@ GrB_Info GB_AaddB__isle_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7543,6 +14846,7 @@ GrB_Info GB_AemultB__isle_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7553,6 +14857,45 @@ GrB_Info GB_AemultB__isle_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_int32
 (
     GrB_Matrix C,
@@ -7577,6 +14920,7 @@ GrB_Info GB_AaddB__isle_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7592,6 +14936,7 @@ GrB_Info GB_AemultB__isle_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7602,6 +14947,45 @@ GrB_Info GB_AemultB__isle_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_int64
 (
     GrB_Matrix C,
@@ -7626,6 +15010,7 @@ GrB_Info GB_AaddB__isle_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7641,6 +15026,7 @@ GrB_Info GB_AemultB__isle_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7651,6 +15037,45 @@ GrB_Info GB_AemultB__isle_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_uint8
 (
     GrB_Matrix C,
@@ -7675,6 +15100,7 @@ GrB_Info GB_AaddB__isle_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7690,6 +15116,7 @@ GrB_Info GB_AemultB__isle_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7700,6 +15127,45 @@ GrB_Info GB_AemultB__isle_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_uint16
 (
     GrB_Matrix C,
@@ -7724,6 +15190,7 @@ GrB_Info GB_AaddB__isle_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7739,6 +15206,7 @@ GrB_Info GB_AemultB__isle_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7749,6 +15217,45 @@ GrB_Info GB_AemultB__isle_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_uint32
 (
     GrB_Matrix C,
@@ -7773,6 +15280,7 @@ GrB_Info GB_AaddB__isle_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7788,6 +15296,7 @@ GrB_Info GB_AemultB__isle_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7798,6 +15307,45 @@ GrB_Info GB_AemultB__isle_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_uint64
 (
     GrB_Matrix C,
@@ -7822,6 +15370,7 @@ GrB_Info GB_AaddB__isle_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7837,6 +15386,7 @@ GrB_Info GB_AemultB__isle_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7847,6 +15397,45 @@ GrB_Info GB_AemultB__isle_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_fp32
 (
     GrB_Matrix C,
@@ -7871,6 +15460,7 @@ GrB_Info GB_AaddB__isle_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7886,6 +15476,7 @@ GrB_Info GB_AemultB__isle_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7896,6 +15487,45 @@ GrB_Info GB_AemultB__isle_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__isle_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__isle_fp64
 (
     GrB_Matrix C,
@@ -7920,6 +15550,7 @@ GrB_Info GB_AaddB__isle_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7935,6 +15566,7 @@ GrB_Info GB_AemultB__isle_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7945,6 +15577,45 @@ GrB_Info GB_AemultB__isle_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_int8
 (
     GrB_Matrix C,
@@ -7969,6 +15640,7 @@ GrB_Info GB_AaddB__eq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -7984,6 +15656,7 @@ GrB_Info GB_AemultB__eq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -7994,6 +15667,45 @@ GrB_Info GB_AemultB__eq_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_int16
 (
     GrB_Matrix C,
@@ -8018,6 +15730,7 @@ GrB_Info GB_AaddB__eq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8033,6 +15746,7 @@ GrB_Info GB_AemultB__eq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8043,6 +15757,45 @@ GrB_Info GB_AemultB__eq_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_int32
 (
     GrB_Matrix C,
@@ -8067,6 +15820,7 @@ GrB_Info GB_AaddB__eq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8082,6 +15836,7 @@ GrB_Info GB_AemultB__eq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8092,6 +15847,45 @@ GrB_Info GB_AemultB__eq_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_int64
 (
     GrB_Matrix C,
@@ -8116,6 +15910,7 @@ GrB_Info GB_AaddB__eq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8131,6 +15926,7 @@ GrB_Info GB_AemultB__eq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8141,6 +15937,45 @@ GrB_Info GB_AemultB__eq_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_uint8
 (
     GrB_Matrix C,
@@ -8165,6 +16000,7 @@ GrB_Info GB_AaddB__eq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8180,6 +16016,7 @@ GrB_Info GB_AemultB__eq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8190,6 +16027,45 @@ GrB_Info GB_AemultB__eq_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_uint16
 (
     GrB_Matrix C,
@@ -8214,6 +16090,7 @@ GrB_Info GB_AaddB__eq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8229,6 +16106,7 @@ GrB_Info GB_AemultB__eq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8239,6 +16117,45 @@ GrB_Info GB_AemultB__eq_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_uint32
 (
     GrB_Matrix C,
@@ -8263,6 +16180,7 @@ GrB_Info GB_AaddB__eq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8278,6 +16196,7 @@ GrB_Info GB_AemultB__eq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8288,6 +16207,45 @@ GrB_Info GB_AemultB__eq_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_uint64
 (
     GrB_Matrix C,
@@ -8312,6 +16270,7 @@ GrB_Info GB_AaddB__eq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8327,6 +16286,7 @@ GrB_Info GB_AemultB__eq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8337,6 +16297,45 @@ GrB_Info GB_AemultB__eq_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_fp32
 (
     GrB_Matrix C,
@@ -8361,6 +16360,7 @@ GrB_Info GB_AaddB__eq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8376,6 +16376,7 @@ GrB_Info GB_AemultB__eq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8386,6 +16387,45 @@ GrB_Info GB_AemultB__eq_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_fp64
 (
     GrB_Matrix C,
@@ -8410,6 +16450,7 @@ GrB_Info GB_AaddB__eq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8425,6 +16466,7 @@ GrB_Info GB_AemultB__eq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8435,6 +16477,45 @@ GrB_Info GB_AemultB__eq_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__eq_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__eq_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__eq_bool
 (
     GrB_Matrix C,
@@ -8459,6 +16540,7 @@ GrB_Info GB_AaddB__eq_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8474,6 +16556,7 @@ GrB_Info GB_AemultB__eq_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8484,6 +16567,45 @@ GrB_Info GB_AemultB__eq_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_int8
 (
     GrB_Matrix C,
@@ -8508,6 +16630,7 @@ GrB_Info GB_AaddB__ne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8523,6 +16646,7 @@ GrB_Info GB_AemultB__ne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8533,6 +16657,45 @@ GrB_Info GB_AemultB__ne_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_int16
 (
     GrB_Matrix C,
@@ -8557,6 +16720,7 @@ GrB_Info GB_AaddB__ne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8572,6 +16736,7 @@ GrB_Info GB_AemultB__ne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8582,6 +16747,45 @@ GrB_Info GB_AemultB__ne_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_int32
 (
     GrB_Matrix C,
@@ -8606,6 +16810,7 @@ GrB_Info GB_AaddB__ne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8621,6 +16826,7 @@ GrB_Info GB_AemultB__ne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8631,6 +16837,45 @@ GrB_Info GB_AemultB__ne_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_int64
 (
     GrB_Matrix C,
@@ -8655,6 +16900,7 @@ GrB_Info GB_AaddB__ne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8670,6 +16916,7 @@ GrB_Info GB_AemultB__ne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8680,6 +16927,45 @@ GrB_Info GB_AemultB__ne_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_uint8
 (
     GrB_Matrix C,
@@ -8704,6 +16990,7 @@ GrB_Info GB_AaddB__ne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8719,6 +17006,7 @@ GrB_Info GB_AemultB__ne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8729,6 +17017,45 @@ GrB_Info GB_AemultB__ne_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_uint16
 (
     GrB_Matrix C,
@@ -8753,6 +17080,7 @@ GrB_Info GB_AaddB__ne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8768,6 +17096,7 @@ GrB_Info GB_AemultB__ne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8778,6 +17107,45 @@ GrB_Info GB_AemultB__ne_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_uint32
 (
     GrB_Matrix C,
@@ -8802,6 +17170,7 @@ GrB_Info GB_AaddB__ne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8817,6 +17186,7 @@ GrB_Info GB_AemultB__ne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8827,6 +17197,45 @@ GrB_Info GB_AemultB__ne_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_uint64
 (
     GrB_Matrix C,
@@ -8851,6 +17260,7 @@ GrB_Info GB_AaddB__ne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8866,6 +17276,7 @@ GrB_Info GB_AemultB__ne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8876,6 +17287,45 @@ GrB_Info GB_AemultB__ne_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_fp32
 (
     GrB_Matrix C,
@@ -8900,6 +17350,7 @@ GrB_Info GB_AaddB__ne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8915,6 +17366,7 @@ GrB_Info GB_AemultB__ne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8925,6 +17377,45 @@ GrB_Info GB_AemultB__ne_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ne_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ne_fp64
 (
     GrB_Matrix C,
@@ -8949,6 +17440,7 @@ GrB_Info GB_AaddB__ne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -8964,6 +17456,7 @@ GrB_Info GB_AemultB__ne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -8974,6 +17467,45 @@ GrB_Info GB_AemultB__ne_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_int8
 (
     GrB_Matrix C,
@@ -8998,6 +17530,7 @@ GrB_Info GB_AaddB__gt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9013,6 +17546,7 @@ GrB_Info GB_AemultB__gt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9023,6 +17557,45 @@ GrB_Info GB_AemultB__gt_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_int16
 (
     GrB_Matrix C,
@@ -9047,6 +17620,7 @@ GrB_Info GB_AaddB__gt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9062,6 +17636,7 @@ GrB_Info GB_AemultB__gt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9072,6 +17647,45 @@ GrB_Info GB_AemultB__gt_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_int32
 (
     GrB_Matrix C,
@@ -9096,6 +17710,7 @@ GrB_Info GB_AaddB__gt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9111,6 +17726,7 @@ GrB_Info GB_AemultB__gt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9121,6 +17737,45 @@ GrB_Info GB_AemultB__gt_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_int64
 (
     GrB_Matrix C,
@@ -9145,6 +17800,7 @@ GrB_Info GB_AaddB__gt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9160,6 +17816,7 @@ GrB_Info GB_AemultB__gt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9170,6 +17827,45 @@ GrB_Info GB_AemultB__gt_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_uint8
 (
     GrB_Matrix C,
@@ -9194,6 +17890,7 @@ GrB_Info GB_AaddB__gt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9209,6 +17906,7 @@ GrB_Info GB_AemultB__gt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9219,6 +17917,45 @@ GrB_Info GB_AemultB__gt_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_uint16
 (
     GrB_Matrix C,
@@ -9243,6 +17980,7 @@ GrB_Info GB_AaddB__gt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9258,6 +17996,7 @@ GrB_Info GB_AemultB__gt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9268,6 +18007,45 @@ GrB_Info GB_AemultB__gt_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_uint32
 (
     GrB_Matrix C,
@@ -9292,6 +18070,7 @@ GrB_Info GB_AaddB__gt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9307,6 +18086,7 @@ GrB_Info GB_AemultB__gt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9317,6 +18097,45 @@ GrB_Info GB_AemultB__gt_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_uint64
 (
     GrB_Matrix C,
@@ -9341,6 +18160,7 @@ GrB_Info GB_AaddB__gt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9356,6 +18176,7 @@ GrB_Info GB_AemultB__gt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9366,6 +18187,45 @@ GrB_Info GB_AemultB__gt_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_fp32
 (
     GrB_Matrix C,
@@ -9390,6 +18250,7 @@ GrB_Info GB_AaddB__gt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9405,6 +18266,7 @@ GrB_Info GB_AemultB__gt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9415,6 +18277,45 @@ GrB_Info GB_AemultB__gt_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_fp64
 (
     GrB_Matrix C,
@@ -9439,6 +18340,7 @@ GrB_Info GB_AaddB__gt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9454,6 +18356,7 @@ GrB_Info GB_AemultB__gt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9464,6 +18367,45 @@ GrB_Info GB_AemultB__gt_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__gt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__gt_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__gt_bool
 (
     GrB_Matrix C,
@@ -9488,6 +18430,7 @@ GrB_Info GB_AaddB__gt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9503,6 +18446,7 @@ GrB_Info GB_AemultB__gt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9513,6 +18457,45 @@ GrB_Info GB_AemultB__gt_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_int8
 (
     GrB_Matrix C,
@@ -9537,6 +18520,7 @@ GrB_Info GB_AaddB__lt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9552,6 +18536,7 @@ GrB_Info GB_AemultB__lt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9562,6 +18547,45 @@ GrB_Info GB_AemultB__lt_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_int16
 (
     GrB_Matrix C,
@@ -9586,6 +18610,7 @@ GrB_Info GB_AaddB__lt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9601,6 +18626,7 @@ GrB_Info GB_AemultB__lt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9611,6 +18637,45 @@ GrB_Info GB_AemultB__lt_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_int32
 (
     GrB_Matrix C,
@@ -9635,6 +18700,7 @@ GrB_Info GB_AaddB__lt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9650,6 +18716,7 @@ GrB_Info GB_AemultB__lt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9660,6 +18727,45 @@ GrB_Info GB_AemultB__lt_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_int64
 (
     GrB_Matrix C,
@@ -9684,6 +18790,7 @@ GrB_Info GB_AaddB__lt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9699,6 +18806,7 @@ GrB_Info GB_AemultB__lt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9709,6 +18817,45 @@ GrB_Info GB_AemultB__lt_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_uint8
 (
     GrB_Matrix C,
@@ -9733,6 +18880,7 @@ GrB_Info GB_AaddB__lt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9748,6 +18896,7 @@ GrB_Info GB_AemultB__lt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9758,6 +18907,45 @@ GrB_Info GB_AemultB__lt_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_uint16
 (
     GrB_Matrix C,
@@ -9782,6 +18970,7 @@ GrB_Info GB_AaddB__lt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9797,6 +18986,7 @@ GrB_Info GB_AemultB__lt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9807,6 +18997,45 @@ GrB_Info GB_AemultB__lt_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_uint32
 (
     GrB_Matrix C,
@@ -9831,6 +19060,7 @@ GrB_Info GB_AaddB__lt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9846,6 +19076,7 @@ GrB_Info GB_AemultB__lt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9856,6 +19087,45 @@ GrB_Info GB_AemultB__lt_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_uint64
 (
     GrB_Matrix C,
@@ -9880,6 +19150,7 @@ GrB_Info GB_AaddB__lt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9895,6 +19166,7 @@ GrB_Info GB_AemultB__lt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9905,6 +19177,45 @@ GrB_Info GB_AemultB__lt_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_fp32
 (
     GrB_Matrix C,
@@ -9929,6 +19240,7 @@ GrB_Info GB_AaddB__lt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9944,6 +19256,7 @@ GrB_Info GB_AemultB__lt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -9954,6 +19267,45 @@ GrB_Info GB_AemultB__lt_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_fp64
 (
     GrB_Matrix C,
@@ -9978,6 +19330,7 @@ GrB_Info GB_AaddB__lt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -9993,6 +19346,7 @@ GrB_Info GB_AemultB__lt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10003,6 +19357,45 @@ GrB_Info GB_AemultB__lt_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lt_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lt_bool
 (
     GrB_Matrix C,
@@ -10027,6 +19420,7 @@ GrB_Info GB_AaddB__lt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10042,6 +19436,7 @@ GrB_Info GB_AemultB__lt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10052,6 +19447,45 @@ GrB_Info GB_AemultB__lt_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_int8
 (
     GrB_Matrix C,
@@ -10076,6 +19510,7 @@ GrB_Info GB_AaddB__ge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10091,6 +19526,7 @@ GrB_Info GB_AemultB__ge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10101,6 +19537,45 @@ GrB_Info GB_AemultB__ge_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_int16
 (
     GrB_Matrix C,
@@ -10125,6 +19600,7 @@ GrB_Info GB_AaddB__ge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10140,6 +19616,7 @@ GrB_Info GB_AemultB__ge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10150,6 +19627,45 @@ GrB_Info GB_AemultB__ge_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_int32
 (
     GrB_Matrix C,
@@ -10174,6 +19690,7 @@ GrB_Info GB_AaddB__ge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10189,6 +19706,7 @@ GrB_Info GB_AemultB__ge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10199,6 +19717,45 @@ GrB_Info GB_AemultB__ge_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_int64
 (
     GrB_Matrix C,
@@ -10223,6 +19780,7 @@ GrB_Info GB_AaddB__ge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10238,6 +19796,7 @@ GrB_Info GB_AemultB__ge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10248,6 +19807,45 @@ GrB_Info GB_AemultB__ge_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_uint8
 (
     GrB_Matrix C,
@@ -10272,6 +19870,7 @@ GrB_Info GB_AaddB__ge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10287,6 +19886,7 @@ GrB_Info GB_AemultB__ge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10297,6 +19897,45 @@ GrB_Info GB_AemultB__ge_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_uint16
 (
     GrB_Matrix C,
@@ -10321,6 +19960,7 @@ GrB_Info GB_AaddB__ge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10336,6 +19976,7 @@ GrB_Info GB_AemultB__ge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10346,6 +19987,45 @@ GrB_Info GB_AemultB__ge_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_uint32
 (
     GrB_Matrix C,
@@ -10370,6 +20050,7 @@ GrB_Info GB_AaddB__ge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10385,6 +20066,7 @@ GrB_Info GB_AemultB__ge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10395,6 +20077,45 @@ GrB_Info GB_AemultB__ge_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_uint64
 (
     GrB_Matrix C,
@@ -10419,6 +20140,7 @@ GrB_Info GB_AaddB__ge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10434,6 +20156,7 @@ GrB_Info GB_AemultB__ge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10444,6 +20167,45 @@ GrB_Info GB_AemultB__ge_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_fp32
 (
     GrB_Matrix C,
@@ -10468,6 +20230,7 @@ GrB_Info GB_AaddB__ge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10483,6 +20246,7 @@ GrB_Info GB_AemultB__ge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10493,6 +20257,45 @@ GrB_Info GB_AemultB__ge_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_fp64
 (
     GrB_Matrix C,
@@ -10517,6 +20320,7 @@ GrB_Info GB_AaddB__ge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10532,6 +20336,7 @@ GrB_Info GB_AemultB__ge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10542,6 +20347,45 @@ GrB_Info GB_AemultB__ge_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__ge_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__ge_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__ge_bool
 (
     GrB_Matrix C,
@@ -10566,6 +20410,7 @@ GrB_Info GB_AaddB__ge_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10581,6 +20426,7 @@ GrB_Info GB_AemultB__ge_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10591,6 +20437,45 @@ GrB_Info GB_AemultB__ge_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_int8
 (
     GrB_Matrix C,
@@ -10615,6 +20500,7 @@ GrB_Info GB_AaddB__le_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10630,6 +20516,7 @@ GrB_Info GB_AemultB__le_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10640,6 +20527,45 @@ GrB_Info GB_AemultB__le_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_int16
 (
     GrB_Matrix C,
@@ -10664,6 +20590,7 @@ GrB_Info GB_AaddB__le_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10679,6 +20606,7 @@ GrB_Info GB_AemultB__le_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10689,6 +20617,45 @@ GrB_Info GB_AemultB__le_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_int32
 (
     GrB_Matrix C,
@@ -10713,6 +20680,7 @@ GrB_Info GB_AaddB__le_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10728,6 +20696,7 @@ GrB_Info GB_AemultB__le_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10738,6 +20707,45 @@ GrB_Info GB_AemultB__le_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_int64
 (
     GrB_Matrix C,
@@ -10762,6 +20770,7 @@ GrB_Info GB_AaddB__le_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10777,6 +20786,7 @@ GrB_Info GB_AemultB__le_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10787,6 +20797,45 @@ GrB_Info GB_AemultB__le_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_uint8
 (
     GrB_Matrix C,
@@ -10811,6 +20860,7 @@ GrB_Info GB_AaddB__le_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10826,6 +20876,7 @@ GrB_Info GB_AemultB__le_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10836,6 +20887,45 @@ GrB_Info GB_AemultB__le_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_uint16
 (
     GrB_Matrix C,
@@ -10860,6 +20950,7 @@ GrB_Info GB_AaddB__le_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10875,6 +20966,7 @@ GrB_Info GB_AemultB__le_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10885,6 +20977,45 @@ GrB_Info GB_AemultB__le_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_uint32
 (
     GrB_Matrix C,
@@ -10909,6 +21040,7 @@ GrB_Info GB_AaddB__le_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10924,6 +21056,7 @@ GrB_Info GB_AemultB__le_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10934,6 +21067,45 @@ GrB_Info GB_AemultB__le_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_uint64
 (
     GrB_Matrix C,
@@ -10958,6 +21130,7 @@ GrB_Info GB_AaddB__le_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -10973,6 +21146,7 @@ GrB_Info GB_AemultB__le_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -10983,6 +21157,45 @@ GrB_Info GB_AemultB__le_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_fp32
 (
     GrB_Matrix C,
@@ -11007,6 +21220,7 @@ GrB_Info GB_AaddB__le_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11022,6 +21236,7 @@ GrB_Info GB_AemultB__le_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11032,6 +21247,45 @@ GrB_Info GB_AemultB__le_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_fp64
 (
     GrB_Matrix C,
@@ -11056,6 +21310,7 @@ GrB_Info GB_AaddB__le_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11071,6 +21326,7 @@ GrB_Info GB_AemultB__le_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11081,6 +21337,45 @@ GrB_Info GB_AemultB__le_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__le_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__le_bool
 (
     GrB_Matrix C,
@@ -11105,6 +21400,7 @@ GrB_Info GB_AaddB__le_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11120,6 +21416,7 @@ GrB_Info GB_AemultB__le_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11130,6 +21427,45 @@ GrB_Info GB_AemultB__le_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_int8
 (
     GrB_Matrix C,
@@ -11154,6 +21490,7 @@ GrB_Info GB_AaddB__lor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11169,6 +21506,7 @@ GrB_Info GB_AemultB__lor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11179,6 +21517,45 @@ GrB_Info GB_AemultB__lor_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_int16
 (
     GrB_Matrix C,
@@ -11203,6 +21580,7 @@ GrB_Info GB_AaddB__lor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11218,6 +21596,7 @@ GrB_Info GB_AemultB__lor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11228,6 +21607,45 @@ GrB_Info GB_AemultB__lor_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_int32
 (
     GrB_Matrix C,
@@ -11252,6 +21670,7 @@ GrB_Info GB_AaddB__lor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11267,6 +21686,7 @@ GrB_Info GB_AemultB__lor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11277,6 +21697,45 @@ GrB_Info GB_AemultB__lor_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_int64
 (
     GrB_Matrix C,
@@ -11301,6 +21760,7 @@ GrB_Info GB_AaddB__lor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11316,6 +21776,7 @@ GrB_Info GB_AemultB__lor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11326,6 +21787,45 @@ GrB_Info GB_AemultB__lor_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_uint8
 (
     GrB_Matrix C,
@@ -11350,6 +21850,7 @@ GrB_Info GB_AaddB__lor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11365,6 +21866,7 @@ GrB_Info GB_AemultB__lor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11375,6 +21877,45 @@ GrB_Info GB_AemultB__lor_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_uint16
 (
     GrB_Matrix C,
@@ -11399,6 +21940,7 @@ GrB_Info GB_AaddB__lor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11414,6 +21956,7 @@ GrB_Info GB_AemultB__lor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11424,6 +21967,45 @@ GrB_Info GB_AemultB__lor_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_uint32
 (
     GrB_Matrix C,
@@ -11448,6 +22030,7 @@ GrB_Info GB_AaddB__lor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11463,6 +22046,7 @@ GrB_Info GB_AemultB__lor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11473,6 +22057,45 @@ GrB_Info GB_AemultB__lor_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_uint64
 (
     GrB_Matrix C,
@@ -11497,6 +22120,7 @@ GrB_Info GB_AaddB__lor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11512,6 +22136,7 @@ GrB_Info GB_AemultB__lor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11522,6 +22147,45 @@ GrB_Info GB_AemultB__lor_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_fp32
 (
     GrB_Matrix C,
@@ -11546,6 +22210,7 @@ GrB_Info GB_AaddB__lor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11561,6 +22226,7 @@ GrB_Info GB_AemultB__lor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11571,6 +22237,45 @@ GrB_Info GB_AemultB__lor_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_fp64
 (
     GrB_Matrix C,
@@ -11595,6 +22300,7 @@ GrB_Info GB_AaddB__lor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11610,6 +22316,7 @@ GrB_Info GB_AemultB__lor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11620,6 +22327,45 @@ GrB_Info GB_AemultB__lor_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lor_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lor_bool
 (
     GrB_Matrix C,
@@ -11644,6 +22390,7 @@ GrB_Info GB_AaddB__lor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11659,6 +22406,7 @@ GrB_Info GB_AemultB__lor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11669,6 +22417,45 @@ GrB_Info GB_AemultB__lor_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_int8
 (
     GrB_Matrix C,
@@ -11693,6 +22480,7 @@ GrB_Info GB_AaddB__land_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11708,6 +22496,7 @@ GrB_Info GB_AemultB__land_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11718,6 +22507,45 @@ GrB_Info GB_AemultB__land_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_int16
 (
     GrB_Matrix C,
@@ -11742,6 +22570,7 @@ GrB_Info GB_AaddB__land_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11757,6 +22586,7 @@ GrB_Info GB_AemultB__land_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11767,6 +22597,45 @@ GrB_Info GB_AemultB__land_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_int32
 (
     GrB_Matrix C,
@@ -11791,6 +22660,7 @@ GrB_Info GB_AaddB__land_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11806,6 +22676,7 @@ GrB_Info GB_AemultB__land_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11816,6 +22687,45 @@ GrB_Info GB_AemultB__land_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_int64
 (
     GrB_Matrix C,
@@ -11840,6 +22750,7 @@ GrB_Info GB_AaddB__land_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11855,6 +22766,7 @@ GrB_Info GB_AemultB__land_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11865,6 +22777,45 @@ GrB_Info GB_AemultB__land_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_uint8
 (
     GrB_Matrix C,
@@ -11889,6 +22840,7 @@ GrB_Info GB_AaddB__land_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11904,6 +22856,7 @@ GrB_Info GB_AemultB__land_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11914,6 +22867,45 @@ GrB_Info GB_AemultB__land_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_uint16
 (
     GrB_Matrix C,
@@ -11938,6 +22930,7 @@ GrB_Info GB_AaddB__land_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -11953,6 +22946,7 @@ GrB_Info GB_AemultB__land_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -11963,6 +22957,45 @@ GrB_Info GB_AemultB__land_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_uint32
 (
     GrB_Matrix C,
@@ -11987,6 +23020,7 @@ GrB_Info GB_AaddB__land_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12002,6 +23036,7 @@ GrB_Info GB_AemultB__land_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12012,6 +23047,45 @@ GrB_Info GB_AemultB__land_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_uint64
 (
     GrB_Matrix C,
@@ -12036,6 +23110,7 @@ GrB_Info GB_AaddB__land_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12051,6 +23126,7 @@ GrB_Info GB_AemultB__land_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12061,6 +23137,45 @@ GrB_Info GB_AemultB__land_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_fp32
 (
     GrB_Matrix C,
@@ -12085,6 +23200,7 @@ GrB_Info GB_AaddB__land_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12100,6 +23216,7 @@ GrB_Info GB_AemultB__land_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12110,6 +23227,45 @@ GrB_Info GB_AemultB__land_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_fp64
 (
     GrB_Matrix C,
@@ -12134,6 +23290,7 @@ GrB_Info GB_AaddB__land_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12149,6 +23306,7 @@ GrB_Info GB_AemultB__land_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12159,6 +23317,45 @@ GrB_Info GB_AemultB__land_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__land_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__land_bool
 (
     GrB_Matrix C,
@@ -12183,6 +23380,7 @@ GrB_Info GB_AaddB__land_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12198,6 +23396,7 @@ GrB_Info GB_AemultB__land_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12208,6 +23407,45 @@ GrB_Info GB_AemultB__land_bool
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_int8
 (
     GrB_Matrix C,
@@ -12232,6 +23470,7 @@ GrB_Info GB_AaddB__lxor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12247,6 +23486,7 @@ GrB_Info GB_AemultB__lxor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12257,6 +23497,45 @@ GrB_Info GB_AemultB__lxor_int8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_int16
 (
     GrB_Matrix C,
@@ -12281,6 +23560,7 @@ GrB_Info GB_AaddB__lxor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12296,6 +23576,7 @@ GrB_Info GB_AemultB__lxor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12306,6 +23587,45 @@ GrB_Info GB_AemultB__lxor_int16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_int32
 (
     GrB_Matrix C,
@@ -12330,6 +23650,7 @@ GrB_Info GB_AaddB__lxor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12345,6 +23666,7 @@ GrB_Info GB_AemultB__lxor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12355,6 +23677,45 @@ GrB_Info GB_AemultB__lxor_int32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_int64
 (
     GrB_Matrix C,
@@ -12379,6 +23740,7 @@ GrB_Info GB_AaddB__lxor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12394,6 +23756,7 @@ GrB_Info GB_AemultB__lxor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12404,6 +23767,45 @@ GrB_Info GB_AemultB__lxor_int64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_uint8
 (
     GrB_Matrix C,
@@ -12428,6 +23830,7 @@ GrB_Info GB_AaddB__lxor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12443,6 +23846,7 @@ GrB_Info GB_AemultB__lxor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12453,6 +23857,45 @@ GrB_Info GB_AemultB__lxor_uint8
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_uint16
 (
     GrB_Matrix C,
@@ -12477,6 +23920,7 @@ GrB_Info GB_AaddB__lxor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12492,6 +23936,7 @@ GrB_Info GB_AemultB__lxor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12502,6 +23947,45 @@ GrB_Info GB_AemultB__lxor_uint16
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_uint32
 (
     GrB_Matrix C,
@@ -12526,6 +24010,7 @@ GrB_Info GB_AaddB__lxor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12541,6 +24026,7 @@ GrB_Info GB_AemultB__lxor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12551,6 +24037,45 @@ GrB_Info GB_AemultB__lxor_uint32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_uint64
 (
     GrB_Matrix C,
@@ -12575,6 +24100,7 @@ GrB_Info GB_AaddB__lxor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12590,6 +24116,7 @@ GrB_Info GB_AemultB__lxor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12600,6 +24127,45 @@ GrB_Info GB_AemultB__lxor_uint64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_fp32
 (
     GrB_Matrix C,
@@ -12624,6 +24190,7 @@ GrB_Info GB_AaddB__lxor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12639,6 +24206,7 @@ GrB_Info GB_AemultB__lxor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12649,6 +24217,45 @@ GrB_Info GB_AemultB__lxor_fp32
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_fp64
 (
     GrB_Matrix C,
@@ -12673,6 +24280,7 @@ GrB_Info GB_AaddB__lxor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12688,6 +24296,7 @@ GrB_Info GB_AemultB__lxor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
@@ -12698,6 +24307,45 @@ GrB_Info GB_AemultB__lxor_fp64
     const int nthreads
 ) ;
 
+
+#if 0
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+#endif
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA__lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX__lxor_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD__lxor_bool
 (
     GrB_Matrix C,
@@ -12722,6 +24370,7 @@ GrB_Info GB_AaddB__lxor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -12737,6 +24386,7 @@ GrB_Info GB_AemultB__lxor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_fp32.c b/Source/Generated/GB_binop__iseq_fp32.c
index 5f2adb849d..42cc713dc3 100644
--- a/Source/Generated/GB_binop__iseq_fp32.c
+++ b/Source/Generated/GB_binop__iseq_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_fp32
-// A.*B function (eWiseMult):  GB_AemultB__iseq_fp32
-// A*D function (colscale):    GB_AxD__iseq_fp32
-// D*A function (rowscale):    GB_DxB__iseq_fp32
+// A+B function (eWiseAdd):         GB_AaddB__iseq_fp32
+// A.*B function (eWiseMult):       GB_AemultB__iseq_fp32
+// A*D function (colscale):         GB_AxD__iseq_fp32
+// D*A function (rowscale):         GB_DxB__iseq_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_FP32 || GxB_NO_ISEQ_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_fp64.c b/Source/Generated/GB_binop__iseq_fp64.c
index 39a731f183..9d80717301 100644
--- a/Source/Generated/GB_binop__iseq_fp64.c
+++ b/Source/Generated/GB_binop__iseq_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_fp64
-// A.*B function (eWiseMult):  GB_AemultB__iseq_fp64
-// A*D function (colscale):    GB_AxD__iseq_fp64
-// D*A function (rowscale):    GB_DxB__iseq_fp64
+// A+B function (eWiseAdd):         GB_AaddB__iseq_fp64
+// A.*B function (eWiseMult):       GB_AemultB__iseq_fp64
+// A*D function (colscale):         GB_AxD__iseq_fp64
+// D*A function (rowscale):         GB_DxB__iseq_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_FP64 || GxB_NO_ISEQ_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_int16.c b/Source/Generated/GB_binop__iseq_int16.c
index 3fc799ab5c..387cea7755 100644
--- a/Source/Generated/GB_binop__iseq_int16.c
+++ b/Source/Generated/GB_binop__iseq_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_int16
-// A.*B function (eWiseMult):  GB_AemultB__iseq_int16
-// A*D function (colscale):    GB_AxD__iseq_int16
-// D*A function (rowscale):    GB_DxB__iseq_int16
+// A+B function (eWiseAdd):         GB_AaddB__iseq_int16
+// A.*B function (eWiseMult):       GB_AemultB__iseq_int16
+// A*D function (colscale):         GB_AxD__iseq_int16
+// D*A function (rowscale):         GB_DxB__iseq_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_INT16 || GxB_NO_ISEQ_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_int32.c b/Source/Generated/GB_binop__iseq_int32.c
index eaaca41fca..0f71baa7c0 100644
--- a/Source/Generated/GB_binop__iseq_int32.c
+++ b/Source/Generated/GB_binop__iseq_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_int32
-// A.*B function (eWiseMult):  GB_AemultB__iseq_int32
-// A*D function (colscale):    GB_AxD__iseq_int32
-// D*A function (rowscale):    GB_DxB__iseq_int32
+// A+B function (eWiseAdd):         GB_AaddB__iseq_int32
+// A.*B function (eWiseMult):       GB_AemultB__iseq_int32
+// A*D function (colscale):         GB_AxD__iseq_int32
+// D*A function (rowscale):         GB_DxB__iseq_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_INT32 || GxB_NO_ISEQ_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_int64.c b/Source/Generated/GB_binop__iseq_int64.c
index 2e3ab967c7..6effb8f841 100644
--- a/Source/Generated/GB_binop__iseq_int64.c
+++ b/Source/Generated/GB_binop__iseq_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_int64
-// A.*B function (eWiseMult):  GB_AemultB__iseq_int64
-// A*D function (colscale):    GB_AxD__iseq_int64
-// D*A function (rowscale):    GB_DxB__iseq_int64
+// A+B function (eWiseAdd):         GB_AaddB__iseq_int64
+// A.*B function (eWiseMult):       GB_AemultB__iseq_int64
+// A*D function (colscale):         GB_AxD__iseq_int64
+// D*A function (rowscale):         GB_DxB__iseq_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_INT64 || GxB_NO_ISEQ_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_int8.c b/Source/Generated/GB_binop__iseq_int8.c
index 448c5e2f59..4900894526 100644
--- a/Source/Generated/GB_binop__iseq_int8.c
+++ b/Source/Generated/GB_binop__iseq_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_int8
-// A.*B function (eWiseMult):  GB_AemultB__iseq_int8
-// A*D function (colscale):    GB_AxD__iseq_int8
-// D*A function (rowscale):    GB_DxB__iseq_int8
+// A+B function (eWiseAdd):         GB_AaddB__iseq_int8
+// A.*B function (eWiseMult):       GB_AemultB__iseq_int8
+// A*D function (colscale):         GB_AxD__iseq_int8
+// D*A function (rowscale):         GB_DxB__iseq_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_INT8 || GxB_NO_ISEQ_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_uint16.c b/Source/Generated/GB_binop__iseq_uint16.c
index c8da53e613..0c5540b95f 100644
--- a/Source/Generated/GB_binop__iseq_uint16.c
+++ b/Source/Generated/GB_binop__iseq_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_uint16
-// A.*B function (eWiseMult):  GB_AemultB__iseq_uint16
-// A*D function (colscale):    GB_AxD__iseq_uint16
-// D*A function (rowscale):    GB_DxB__iseq_uint16
+// A+B function (eWiseAdd):         GB_AaddB__iseq_uint16
+// A.*B function (eWiseMult):       GB_AemultB__iseq_uint16
+// A*D function (colscale):         GB_AxD__iseq_uint16
+// D*A function (rowscale):         GB_DxB__iseq_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_UINT16 || GxB_NO_ISEQ_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_uint32.c b/Source/Generated/GB_binop__iseq_uint32.c
index a598400363..0e1832c678 100644
--- a/Source/Generated/GB_binop__iseq_uint32.c
+++ b/Source/Generated/GB_binop__iseq_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_uint32
-// A.*B function (eWiseMult):  GB_AemultB__iseq_uint32
-// A*D function (colscale):    GB_AxD__iseq_uint32
-// D*A function (rowscale):    GB_DxB__iseq_uint32
+// A+B function (eWiseAdd):         GB_AaddB__iseq_uint32
+// A.*B function (eWiseMult):       GB_AemultB__iseq_uint32
+// A*D function (colscale):         GB_AxD__iseq_uint32
+// D*A function (rowscale):         GB_DxB__iseq_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_UINT32 || GxB_NO_ISEQ_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_uint64.c b/Source/Generated/GB_binop__iseq_uint64.c
index 5504fad89b..6e4627e85e 100644
--- a/Source/Generated/GB_binop__iseq_uint64.c
+++ b/Source/Generated/GB_binop__iseq_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_uint64
-// A.*B function (eWiseMult):  GB_AemultB__iseq_uint64
-// A*D function (colscale):    GB_AxD__iseq_uint64
-// D*A function (rowscale):    GB_DxB__iseq_uint64
+// A+B function (eWiseAdd):         GB_AaddB__iseq_uint64
+// A.*B function (eWiseMult):       GB_AemultB__iseq_uint64
+// A*D function (colscale):         GB_AxD__iseq_uint64
+// D*A function (rowscale):         GB_DxB__iseq_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_UINT64 || GxB_NO_ISEQ_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__iseq_uint8.c b/Source/Generated/GB_binop__iseq_uint8.c
index 121fddcb69..f85fec34fd 100644
--- a/Source/Generated/GB_binop__iseq_uint8.c
+++ b/Source/Generated/GB_binop__iseq_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__iseq_uint8
-// A.*B function (eWiseMult):  GB_AemultB__iseq_uint8
-// A*D function (colscale):    GB_AxD__iseq_uint8
-// D*A function (rowscale):    GB_DxB__iseq_uint8
+// A+B function (eWiseAdd):         GB_AaddB__iseq_uint8
+// A.*B function (eWiseMult):       GB_AemultB__iseq_uint8
+// A*D function (colscale):         GB_AxD__iseq_uint8
+// D*A function (rowscale):         GB_DxB__iseq_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__iseq_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__iseq_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__iseq_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x == y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISEQ || GxB_NO_UINT8 || GxB_NO_ISEQ_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__iseq_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__iseq_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__iseq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__iseq_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_fp32.c b/Source/Generated/GB_binop__isge_fp32.c
index b4de57d76d..ae534fdf31 100644
--- a/Source/Generated/GB_binop__isge_fp32.c
+++ b/Source/Generated/GB_binop__isge_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_fp32
-// A.*B function (eWiseMult):  GB_AemultB__isge_fp32
-// A*D function (colscale):    GB_AxD__isge_fp32
-// D*A function (rowscale):    GB_DxB__isge_fp32
+// A+B function (eWiseAdd):         GB_AaddB__isge_fp32
+// A.*B function (eWiseMult):       GB_AemultB__isge_fp32
+// A*D function (colscale):         GB_AxD__isge_fp32
+// D*A function (rowscale):         GB_DxB__isge_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_FP32 || GxB_NO_ISGE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_fp64.c b/Source/Generated/GB_binop__isge_fp64.c
index a01403102b..9c01909026 100644
--- a/Source/Generated/GB_binop__isge_fp64.c
+++ b/Source/Generated/GB_binop__isge_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_fp64
-// A.*B function (eWiseMult):  GB_AemultB__isge_fp64
-// A*D function (colscale):    GB_AxD__isge_fp64
-// D*A function (rowscale):    GB_DxB__isge_fp64
+// A+B function (eWiseAdd):         GB_AaddB__isge_fp64
+// A.*B function (eWiseMult):       GB_AemultB__isge_fp64
+// A*D function (colscale):         GB_AxD__isge_fp64
+// D*A function (rowscale):         GB_DxB__isge_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_FP64 || GxB_NO_ISGE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_int16.c b/Source/Generated/GB_binop__isge_int16.c
index 0abbb6697e..ee2463909f 100644
--- a/Source/Generated/GB_binop__isge_int16.c
+++ b/Source/Generated/GB_binop__isge_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_int16
-// A.*B function (eWiseMult):  GB_AemultB__isge_int16
-// A*D function (colscale):    GB_AxD__isge_int16
-// D*A function (rowscale):    GB_DxB__isge_int16
+// A+B function (eWiseAdd):         GB_AaddB__isge_int16
+// A.*B function (eWiseMult):       GB_AemultB__isge_int16
+// A*D function (colscale):         GB_AxD__isge_int16
+// D*A function (rowscale):         GB_DxB__isge_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_INT16 || GxB_NO_ISGE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_int32.c b/Source/Generated/GB_binop__isge_int32.c
index b1cd283bfb..45924b1d29 100644
--- a/Source/Generated/GB_binop__isge_int32.c
+++ b/Source/Generated/GB_binop__isge_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_int32
-// A.*B function (eWiseMult):  GB_AemultB__isge_int32
-// A*D function (colscale):    GB_AxD__isge_int32
-// D*A function (rowscale):    GB_DxB__isge_int32
+// A+B function (eWiseAdd):         GB_AaddB__isge_int32
+// A.*B function (eWiseMult):       GB_AemultB__isge_int32
+// A*D function (colscale):         GB_AxD__isge_int32
+// D*A function (rowscale):         GB_DxB__isge_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_INT32 || GxB_NO_ISGE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_int64.c b/Source/Generated/GB_binop__isge_int64.c
index cb5c00a715..f90753ca9a 100644
--- a/Source/Generated/GB_binop__isge_int64.c
+++ b/Source/Generated/GB_binop__isge_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_int64
-// A.*B function (eWiseMult):  GB_AemultB__isge_int64
-// A*D function (colscale):    GB_AxD__isge_int64
-// D*A function (rowscale):    GB_DxB__isge_int64
+// A+B function (eWiseAdd):         GB_AaddB__isge_int64
+// A.*B function (eWiseMult):       GB_AemultB__isge_int64
+// A*D function (colscale):         GB_AxD__isge_int64
+// D*A function (rowscale):         GB_DxB__isge_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_INT64 || GxB_NO_ISGE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_int8.c b/Source/Generated/GB_binop__isge_int8.c
index fb902006d2..50b93d048d 100644
--- a/Source/Generated/GB_binop__isge_int8.c
+++ b/Source/Generated/GB_binop__isge_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_int8
-// A.*B function (eWiseMult):  GB_AemultB__isge_int8
-// A*D function (colscale):    GB_AxD__isge_int8
-// D*A function (rowscale):    GB_DxB__isge_int8
+// A+B function (eWiseAdd):         GB_AaddB__isge_int8
+// A.*B function (eWiseMult):       GB_AemultB__isge_int8
+// A*D function (colscale):         GB_AxD__isge_int8
+// D*A function (rowscale):         GB_DxB__isge_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_INT8 || GxB_NO_ISGE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_uint16.c b/Source/Generated/GB_binop__isge_uint16.c
index 1a75e28da9..da39447249 100644
--- a/Source/Generated/GB_binop__isge_uint16.c
+++ b/Source/Generated/GB_binop__isge_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_uint16
-// A.*B function (eWiseMult):  GB_AemultB__isge_uint16
-// A*D function (colscale):    GB_AxD__isge_uint16
-// D*A function (rowscale):    GB_DxB__isge_uint16
+// A+B function (eWiseAdd):         GB_AaddB__isge_uint16
+// A.*B function (eWiseMult):       GB_AemultB__isge_uint16
+// A*D function (colscale):         GB_AxD__isge_uint16
+// D*A function (rowscale):         GB_DxB__isge_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_UINT16 || GxB_NO_ISGE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_uint32.c b/Source/Generated/GB_binop__isge_uint32.c
index a538bcc2b0..327fedb2e2 100644
--- a/Source/Generated/GB_binop__isge_uint32.c
+++ b/Source/Generated/GB_binop__isge_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_uint32
-// A.*B function (eWiseMult):  GB_AemultB__isge_uint32
-// A*D function (colscale):    GB_AxD__isge_uint32
-// D*A function (rowscale):    GB_DxB__isge_uint32
+// A+B function (eWiseAdd):         GB_AaddB__isge_uint32
+// A.*B function (eWiseMult):       GB_AemultB__isge_uint32
+// A*D function (colscale):         GB_AxD__isge_uint32
+// D*A function (rowscale):         GB_DxB__isge_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_UINT32 || GxB_NO_ISGE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_uint64.c b/Source/Generated/GB_binop__isge_uint64.c
index 586f12e840..6ac494b7c0 100644
--- a/Source/Generated/GB_binop__isge_uint64.c
+++ b/Source/Generated/GB_binop__isge_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_uint64
-// A.*B function (eWiseMult):  GB_AemultB__isge_uint64
-// A*D function (colscale):    GB_AxD__isge_uint64
-// D*A function (rowscale):    GB_DxB__isge_uint64
+// A+B function (eWiseAdd):         GB_AaddB__isge_uint64
+// A.*B function (eWiseMult):       GB_AemultB__isge_uint64
+// A*D function (colscale):         GB_AxD__isge_uint64
+// D*A function (rowscale):         GB_DxB__isge_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_UINT64 || GxB_NO_ISGE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isge_uint8.c b/Source/Generated/GB_binop__isge_uint8.c
index 7234f972a4..b558c72a77 100644
--- a/Source/Generated/GB_binop__isge_uint8.c
+++ b/Source/Generated/GB_binop__isge_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isge_uint8
-// A.*B function (eWiseMult):  GB_AemultB__isge_uint8
-// A*D function (colscale):    GB_AxD__isge_uint8
-// D*A function (rowscale):    GB_DxB__isge_uint8
+// A+B function (eWiseAdd):         GB_AaddB__isge_uint8
+// A.*B function (eWiseMult):       GB_AemultB__isge_uint8
+// A*D function (colscale):         GB_AxD__isge_uint8
+// D*A function (rowscale):         GB_DxB__isge_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__isge_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__isge_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isge_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x >= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGE || GxB_NO_UINT8 || GxB_NO_ISGE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isge_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isge_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isge_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_fp32.c b/Source/Generated/GB_binop__isgt_fp32.c
index a0767d3b62..79ee37da14 100644
--- a/Source/Generated/GB_binop__isgt_fp32.c
+++ b/Source/Generated/GB_binop__isgt_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_fp32
-// A.*B function (eWiseMult):  GB_AemultB__isgt_fp32
-// A*D function (colscale):    GB_AxD__isgt_fp32
-// D*A function (rowscale):    GB_DxB__isgt_fp32
+// A+B function (eWiseAdd):         GB_AaddB__isgt_fp32
+// A.*B function (eWiseMult):       GB_AemultB__isgt_fp32
+// A*D function (colscale):         GB_AxD__isgt_fp32
+// D*A function (rowscale):         GB_DxB__isgt_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_FP32 || GxB_NO_ISGT_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_fp64.c b/Source/Generated/GB_binop__isgt_fp64.c
index 8149cded52..2fb3c73d13 100644
--- a/Source/Generated/GB_binop__isgt_fp64.c
+++ b/Source/Generated/GB_binop__isgt_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_fp64
-// A.*B function (eWiseMult):  GB_AemultB__isgt_fp64
-// A*D function (colscale):    GB_AxD__isgt_fp64
-// D*A function (rowscale):    GB_DxB__isgt_fp64
+// A+B function (eWiseAdd):         GB_AaddB__isgt_fp64
+// A.*B function (eWiseMult):       GB_AemultB__isgt_fp64
+// A*D function (colscale):         GB_AxD__isgt_fp64
+// D*A function (rowscale):         GB_DxB__isgt_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_FP64 || GxB_NO_ISGT_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_int16.c b/Source/Generated/GB_binop__isgt_int16.c
index f9e2c4ca6e..19e243d940 100644
--- a/Source/Generated/GB_binop__isgt_int16.c
+++ b/Source/Generated/GB_binop__isgt_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_int16
-// A.*B function (eWiseMult):  GB_AemultB__isgt_int16
-// A*D function (colscale):    GB_AxD__isgt_int16
-// D*A function (rowscale):    GB_DxB__isgt_int16
+// A+B function (eWiseAdd):         GB_AaddB__isgt_int16
+// A.*B function (eWiseMult):       GB_AemultB__isgt_int16
+// A*D function (colscale):         GB_AxD__isgt_int16
+// D*A function (rowscale):         GB_DxB__isgt_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_INT16 || GxB_NO_ISGT_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_int32.c b/Source/Generated/GB_binop__isgt_int32.c
index bc8a20695a..fe9a6b3ca6 100644
--- a/Source/Generated/GB_binop__isgt_int32.c
+++ b/Source/Generated/GB_binop__isgt_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_int32
-// A.*B function (eWiseMult):  GB_AemultB__isgt_int32
-// A*D function (colscale):    GB_AxD__isgt_int32
-// D*A function (rowscale):    GB_DxB__isgt_int32
+// A+B function (eWiseAdd):         GB_AaddB__isgt_int32
+// A.*B function (eWiseMult):       GB_AemultB__isgt_int32
+// A*D function (colscale):         GB_AxD__isgt_int32
+// D*A function (rowscale):         GB_DxB__isgt_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_INT32 || GxB_NO_ISGT_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_int64.c b/Source/Generated/GB_binop__isgt_int64.c
index 12fd9a9a50..13afd91ba2 100644
--- a/Source/Generated/GB_binop__isgt_int64.c
+++ b/Source/Generated/GB_binop__isgt_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_int64
-// A.*B function (eWiseMult):  GB_AemultB__isgt_int64
-// A*D function (colscale):    GB_AxD__isgt_int64
-// D*A function (rowscale):    GB_DxB__isgt_int64
+// A+B function (eWiseAdd):         GB_AaddB__isgt_int64
+// A.*B function (eWiseMult):       GB_AemultB__isgt_int64
+// A*D function (colscale):         GB_AxD__isgt_int64
+// D*A function (rowscale):         GB_DxB__isgt_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_INT64 || GxB_NO_ISGT_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_int8.c b/Source/Generated/GB_binop__isgt_int8.c
index 2296c5584e..78e1afa455 100644
--- a/Source/Generated/GB_binop__isgt_int8.c
+++ b/Source/Generated/GB_binop__isgt_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_int8
-// A.*B function (eWiseMult):  GB_AemultB__isgt_int8
-// A*D function (colscale):    GB_AxD__isgt_int8
-// D*A function (rowscale):    GB_DxB__isgt_int8
+// A+B function (eWiseAdd):         GB_AaddB__isgt_int8
+// A.*B function (eWiseMult):       GB_AemultB__isgt_int8
+// A*D function (colscale):         GB_AxD__isgt_int8
+// D*A function (rowscale):         GB_DxB__isgt_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_INT8 || GxB_NO_ISGT_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_uint16.c b/Source/Generated/GB_binop__isgt_uint16.c
index 9b6d17a3ff..f9c13280b8 100644
--- a/Source/Generated/GB_binop__isgt_uint16.c
+++ b/Source/Generated/GB_binop__isgt_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_uint16
-// A.*B function (eWiseMult):  GB_AemultB__isgt_uint16
-// A*D function (colscale):    GB_AxD__isgt_uint16
-// D*A function (rowscale):    GB_DxB__isgt_uint16
+// A+B function (eWiseAdd):         GB_AaddB__isgt_uint16
+// A.*B function (eWiseMult):       GB_AemultB__isgt_uint16
+// A*D function (colscale):         GB_AxD__isgt_uint16
+// D*A function (rowscale):         GB_DxB__isgt_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_UINT16 || GxB_NO_ISGT_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_uint32.c b/Source/Generated/GB_binop__isgt_uint32.c
index 5342ce1b3b..b85714b654 100644
--- a/Source/Generated/GB_binop__isgt_uint32.c
+++ b/Source/Generated/GB_binop__isgt_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_uint32
-// A.*B function (eWiseMult):  GB_AemultB__isgt_uint32
-// A*D function (colscale):    GB_AxD__isgt_uint32
-// D*A function (rowscale):    GB_DxB__isgt_uint32
+// A+B function (eWiseAdd):         GB_AaddB__isgt_uint32
+// A.*B function (eWiseMult):       GB_AemultB__isgt_uint32
+// A*D function (colscale):         GB_AxD__isgt_uint32
+// D*A function (rowscale):         GB_DxB__isgt_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_UINT32 || GxB_NO_ISGT_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_uint64.c b/Source/Generated/GB_binop__isgt_uint64.c
index 803fa565b7..83114ff70c 100644
--- a/Source/Generated/GB_binop__isgt_uint64.c
+++ b/Source/Generated/GB_binop__isgt_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_uint64
-// A.*B function (eWiseMult):  GB_AemultB__isgt_uint64
-// A*D function (colscale):    GB_AxD__isgt_uint64
-// D*A function (rowscale):    GB_DxB__isgt_uint64
+// A+B function (eWiseAdd):         GB_AaddB__isgt_uint64
+// A.*B function (eWiseMult):       GB_AemultB__isgt_uint64
+// A*D function (colscale):         GB_AxD__isgt_uint64
+// D*A function (rowscale):         GB_DxB__isgt_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_UINT64 || GxB_NO_ISGT_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isgt_uint8.c b/Source/Generated/GB_binop__isgt_uint8.c
index 495a7f9a55..441156ed70 100644
--- a/Source/Generated/GB_binop__isgt_uint8.c
+++ b/Source/Generated/GB_binop__isgt_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isgt_uint8
-// A.*B function (eWiseMult):  GB_AemultB__isgt_uint8
-// A*D function (colscale):    GB_AxD__isgt_uint8
-// D*A function (rowscale):    GB_DxB__isgt_uint8
+// A+B function (eWiseAdd):         GB_AaddB__isgt_uint8
+// A.*B function (eWiseMult):       GB_AemultB__isgt_uint8
+// A*D function (colscale):         GB_AxD__isgt_uint8
+// D*A function (rowscale):         GB_DxB__isgt_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__isgt_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__isgt_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isgt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x > y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISGT || GxB_NO_UINT8 || GxB_NO_ISGT_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isgt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isgt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isgt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isgt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_fp32.c b/Source/Generated/GB_binop__isle_fp32.c
index b655b4213e..8a20a022f9 100644
--- a/Source/Generated/GB_binop__isle_fp32.c
+++ b/Source/Generated/GB_binop__isle_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_fp32
-// A.*B function (eWiseMult):  GB_AemultB__isle_fp32
-// A*D function (colscale):    GB_AxD__isle_fp32
-// D*A function (rowscale):    GB_DxB__isle_fp32
+// A+B function (eWiseAdd):         GB_AaddB__isle_fp32
+// A.*B function (eWiseMult):       GB_AemultB__isle_fp32
+// A*D function (colscale):         GB_AxD__isle_fp32
+// D*A function (rowscale):         GB_DxB__isle_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_FP32 || GxB_NO_ISLE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_fp64.c b/Source/Generated/GB_binop__isle_fp64.c
index 1153513509..3eba67837a 100644
--- a/Source/Generated/GB_binop__isle_fp64.c
+++ b/Source/Generated/GB_binop__isle_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_fp64
-// A.*B function (eWiseMult):  GB_AemultB__isle_fp64
-// A*D function (colscale):    GB_AxD__isle_fp64
-// D*A function (rowscale):    GB_DxB__isle_fp64
+// A+B function (eWiseAdd):         GB_AaddB__isle_fp64
+// A.*B function (eWiseMult):       GB_AemultB__isle_fp64
+// A*D function (colscale):         GB_AxD__isle_fp64
+// D*A function (rowscale):         GB_DxB__isle_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_FP64 || GxB_NO_ISLE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_int16.c b/Source/Generated/GB_binop__isle_int16.c
index 8fc76dee84..b2d28a66ab 100644
--- a/Source/Generated/GB_binop__isle_int16.c
+++ b/Source/Generated/GB_binop__isle_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_int16
-// A.*B function (eWiseMult):  GB_AemultB__isle_int16
-// A*D function (colscale):    GB_AxD__isle_int16
-// D*A function (rowscale):    GB_DxB__isle_int16
+// A+B function (eWiseAdd):         GB_AaddB__isle_int16
+// A.*B function (eWiseMult):       GB_AemultB__isle_int16
+// A*D function (colscale):         GB_AxD__isle_int16
+// D*A function (rowscale):         GB_DxB__isle_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_INT16 || GxB_NO_ISLE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_int32.c b/Source/Generated/GB_binop__isle_int32.c
index 67962bf5c6..cebdb39934 100644
--- a/Source/Generated/GB_binop__isle_int32.c
+++ b/Source/Generated/GB_binop__isle_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_int32
-// A.*B function (eWiseMult):  GB_AemultB__isle_int32
-// A*D function (colscale):    GB_AxD__isle_int32
-// D*A function (rowscale):    GB_DxB__isle_int32
+// A+B function (eWiseAdd):         GB_AaddB__isle_int32
+// A.*B function (eWiseMult):       GB_AemultB__isle_int32
+// A*D function (colscale):         GB_AxD__isle_int32
+// D*A function (rowscale):         GB_DxB__isle_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_INT32 || GxB_NO_ISLE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_int64.c b/Source/Generated/GB_binop__isle_int64.c
index 82eb87ef0e..8bc5fe558a 100644
--- a/Source/Generated/GB_binop__isle_int64.c
+++ b/Source/Generated/GB_binop__isle_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_int64
-// A.*B function (eWiseMult):  GB_AemultB__isle_int64
-// A*D function (colscale):    GB_AxD__isle_int64
-// D*A function (rowscale):    GB_DxB__isle_int64
+// A+B function (eWiseAdd):         GB_AaddB__isle_int64
+// A.*B function (eWiseMult):       GB_AemultB__isle_int64
+// A*D function (colscale):         GB_AxD__isle_int64
+// D*A function (rowscale):         GB_DxB__isle_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_INT64 || GxB_NO_ISLE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_int8.c b/Source/Generated/GB_binop__isle_int8.c
index 8e7a871958..b436cc35a6 100644
--- a/Source/Generated/GB_binop__isle_int8.c
+++ b/Source/Generated/GB_binop__isle_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_int8
-// A.*B function (eWiseMult):  GB_AemultB__isle_int8
-// A*D function (colscale):    GB_AxD__isle_int8
-// D*A function (rowscale):    GB_DxB__isle_int8
+// A+B function (eWiseAdd):         GB_AaddB__isle_int8
+// A.*B function (eWiseMult):       GB_AemultB__isle_int8
+// A*D function (colscale):         GB_AxD__isle_int8
+// D*A function (rowscale):         GB_DxB__isle_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_INT8 || GxB_NO_ISLE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_uint16.c b/Source/Generated/GB_binop__isle_uint16.c
index 3f466de45f..3eace0f18f 100644
--- a/Source/Generated/GB_binop__isle_uint16.c
+++ b/Source/Generated/GB_binop__isle_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_uint16
-// A.*B function (eWiseMult):  GB_AemultB__isle_uint16
-// A*D function (colscale):    GB_AxD__isle_uint16
-// D*A function (rowscale):    GB_DxB__isle_uint16
+// A+B function (eWiseAdd):         GB_AaddB__isle_uint16
+// A.*B function (eWiseMult):       GB_AemultB__isle_uint16
+// A*D function (colscale):         GB_AxD__isle_uint16
+// D*A function (rowscale):         GB_DxB__isle_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_UINT16 || GxB_NO_ISLE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_uint32.c b/Source/Generated/GB_binop__isle_uint32.c
index eac99e122d..1e7f7cceec 100644
--- a/Source/Generated/GB_binop__isle_uint32.c
+++ b/Source/Generated/GB_binop__isle_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_uint32
-// A.*B function (eWiseMult):  GB_AemultB__isle_uint32
-// A*D function (colscale):    GB_AxD__isle_uint32
-// D*A function (rowscale):    GB_DxB__isle_uint32
+// A+B function (eWiseAdd):         GB_AaddB__isle_uint32
+// A.*B function (eWiseMult):       GB_AemultB__isle_uint32
+// A*D function (colscale):         GB_AxD__isle_uint32
+// D*A function (rowscale):         GB_DxB__isle_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_UINT32 || GxB_NO_ISLE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_uint64.c b/Source/Generated/GB_binop__isle_uint64.c
index c9275de13e..ef128b1acd 100644
--- a/Source/Generated/GB_binop__isle_uint64.c
+++ b/Source/Generated/GB_binop__isle_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_uint64
-// A.*B function (eWiseMult):  GB_AemultB__isle_uint64
-// A*D function (colscale):    GB_AxD__isle_uint64
-// D*A function (rowscale):    GB_DxB__isle_uint64
+// A+B function (eWiseAdd):         GB_AaddB__isle_uint64
+// A.*B function (eWiseMult):       GB_AemultB__isle_uint64
+// A*D function (colscale):         GB_AxD__isle_uint64
+// D*A function (rowscale):         GB_DxB__isle_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_UINT64 || GxB_NO_ISLE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isle_uint8.c b/Source/Generated/GB_binop__isle_uint8.c
index 7d104b2f55..a38280f41b 100644
--- a/Source/Generated/GB_binop__isle_uint8.c
+++ b/Source/Generated/GB_binop__isle_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isle_uint8
-// A.*B function (eWiseMult):  GB_AemultB__isle_uint8
-// A*D function (colscale):    GB_AxD__isle_uint8
-// D*A function (rowscale):    GB_DxB__isle_uint8
+// A+B function (eWiseAdd):         GB_AaddB__isle_uint8
+// A.*B function (eWiseMult):       GB_AemultB__isle_uint8
+// A*D function (colscale):         GB_AxD__isle_uint8
+// D*A function (rowscale):         GB_DxB__isle_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__isle_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__isle_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isle_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLE || GxB_NO_UINT8 || GxB_NO_ISLE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isle_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isle_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isle_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isle_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_fp32.c b/Source/Generated/GB_binop__islt_fp32.c
index a680844f88..7164e81cab 100644
--- a/Source/Generated/GB_binop__islt_fp32.c
+++ b/Source/Generated/GB_binop__islt_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_fp32
-// A.*B function (eWiseMult):  GB_AemultB__islt_fp32
-// A*D function (colscale):    GB_AxD__islt_fp32
-// D*A function (rowscale):    GB_DxB__islt_fp32
+// A+B function (eWiseAdd):         GB_AaddB__islt_fp32
+// A.*B function (eWiseMult):       GB_AemultB__islt_fp32
+// A*D function (colscale):         GB_AxD__islt_fp32
+// D*A function (rowscale):         GB_DxB__islt_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_FP32 || GxB_NO_ISLT_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_fp64.c b/Source/Generated/GB_binop__islt_fp64.c
index d0ee7a1116..a4657fffb3 100644
--- a/Source/Generated/GB_binop__islt_fp64.c
+++ b/Source/Generated/GB_binop__islt_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_fp64
-// A.*B function (eWiseMult):  GB_AemultB__islt_fp64
-// A*D function (colscale):    GB_AxD__islt_fp64
-// D*A function (rowscale):    GB_DxB__islt_fp64
+// A+B function (eWiseAdd):         GB_AaddB__islt_fp64
+// A.*B function (eWiseMult):       GB_AemultB__islt_fp64
+// A*D function (colscale):         GB_AxD__islt_fp64
+// D*A function (rowscale):         GB_DxB__islt_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_FP64 || GxB_NO_ISLT_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_int16.c b/Source/Generated/GB_binop__islt_int16.c
index 8dee94f363..e43d145c80 100644
--- a/Source/Generated/GB_binop__islt_int16.c
+++ b/Source/Generated/GB_binop__islt_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_int16
-// A.*B function (eWiseMult):  GB_AemultB__islt_int16
-// A*D function (colscale):    GB_AxD__islt_int16
-// D*A function (rowscale):    GB_DxB__islt_int16
+// A+B function (eWiseAdd):         GB_AaddB__islt_int16
+// A.*B function (eWiseMult):       GB_AemultB__islt_int16
+// A*D function (colscale):         GB_AxD__islt_int16
+// D*A function (rowscale):         GB_DxB__islt_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_INT16 || GxB_NO_ISLT_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_int32.c b/Source/Generated/GB_binop__islt_int32.c
index afddadbce5..2ec4698d6c 100644
--- a/Source/Generated/GB_binop__islt_int32.c
+++ b/Source/Generated/GB_binop__islt_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_int32
-// A.*B function (eWiseMult):  GB_AemultB__islt_int32
-// A*D function (colscale):    GB_AxD__islt_int32
-// D*A function (rowscale):    GB_DxB__islt_int32
+// A+B function (eWiseAdd):         GB_AaddB__islt_int32
+// A.*B function (eWiseMult):       GB_AemultB__islt_int32
+// A*D function (colscale):         GB_AxD__islt_int32
+// D*A function (rowscale):         GB_DxB__islt_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_INT32 || GxB_NO_ISLT_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_int64.c b/Source/Generated/GB_binop__islt_int64.c
index 2dd1be09b3..dcc65a4d51 100644
--- a/Source/Generated/GB_binop__islt_int64.c
+++ b/Source/Generated/GB_binop__islt_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_int64
-// A.*B function (eWiseMult):  GB_AemultB__islt_int64
-// A*D function (colscale):    GB_AxD__islt_int64
-// D*A function (rowscale):    GB_DxB__islt_int64
+// A+B function (eWiseAdd):         GB_AaddB__islt_int64
+// A.*B function (eWiseMult):       GB_AemultB__islt_int64
+// A*D function (colscale):         GB_AxD__islt_int64
+// D*A function (rowscale):         GB_DxB__islt_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_INT64 || GxB_NO_ISLT_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_int8.c b/Source/Generated/GB_binop__islt_int8.c
index c4873681d6..cc673d11ae 100644
--- a/Source/Generated/GB_binop__islt_int8.c
+++ b/Source/Generated/GB_binop__islt_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_int8
-// A.*B function (eWiseMult):  GB_AemultB__islt_int8
-// A*D function (colscale):    GB_AxD__islt_int8
-// D*A function (rowscale):    GB_DxB__islt_int8
+// A+B function (eWiseAdd):         GB_AaddB__islt_int8
+// A.*B function (eWiseMult):       GB_AemultB__islt_int8
+// A*D function (colscale):         GB_AxD__islt_int8
+// D*A function (rowscale):         GB_DxB__islt_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_INT8 || GxB_NO_ISLT_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_uint16.c b/Source/Generated/GB_binop__islt_uint16.c
index 05104d213e..66bea8441e 100644
--- a/Source/Generated/GB_binop__islt_uint16.c
+++ b/Source/Generated/GB_binop__islt_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_uint16
-// A.*B function (eWiseMult):  GB_AemultB__islt_uint16
-// A*D function (colscale):    GB_AxD__islt_uint16
-// D*A function (rowscale):    GB_DxB__islt_uint16
+// A+B function (eWiseAdd):         GB_AaddB__islt_uint16
+// A.*B function (eWiseMult):       GB_AemultB__islt_uint16
+// A*D function (colscale):         GB_AxD__islt_uint16
+// D*A function (rowscale):         GB_DxB__islt_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_UINT16 || GxB_NO_ISLT_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_uint32.c b/Source/Generated/GB_binop__islt_uint32.c
index 9425a988cc..69638beab5 100644
--- a/Source/Generated/GB_binop__islt_uint32.c
+++ b/Source/Generated/GB_binop__islt_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_uint32
-// A.*B function (eWiseMult):  GB_AemultB__islt_uint32
-// A*D function (colscale):    GB_AxD__islt_uint32
-// D*A function (rowscale):    GB_DxB__islt_uint32
+// A+B function (eWiseAdd):         GB_AaddB__islt_uint32
+// A.*B function (eWiseMult):       GB_AemultB__islt_uint32
+// A*D function (colscale):         GB_AxD__islt_uint32
+// D*A function (rowscale):         GB_DxB__islt_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_UINT32 || GxB_NO_ISLT_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_uint64.c b/Source/Generated/GB_binop__islt_uint64.c
index da9fabc936..5a61db72ae 100644
--- a/Source/Generated/GB_binop__islt_uint64.c
+++ b/Source/Generated/GB_binop__islt_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_uint64
-// A.*B function (eWiseMult):  GB_AemultB__islt_uint64
-// A*D function (colscale):    GB_AxD__islt_uint64
-// D*A function (rowscale):    GB_DxB__islt_uint64
+// A+B function (eWiseAdd):         GB_AaddB__islt_uint64
+// A.*B function (eWiseMult):       GB_AemultB__islt_uint64
+// A*D function (colscale):         GB_AxD__islt_uint64
+// D*A function (rowscale):         GB_DxB__islt_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_UINT64 || GxB_NO_ISLT_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__islt_uint8.c b/Source/Generated/GB_binop__islt_uint8.c
index 48ac987fd8..92a5e8e2c9 100644
--- a/Source/Generated/GB_binop__islt_uint8.c
+++ b/Source/Generated/GB_binop__islt_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__islt_uint8
-// A.*B function (eWiseMult):  GB_AemultB__islt_uint8
-// A*D function (colscale):    GB_AxD__islt_uint8
-// D*A function (rowscale):    GB_DxB__islt_uint8
+// A+B function (eWiseAdd):         GB_AaddB__islt_uint8
+// A.*B function (eWiseMult):       GB_AemultB__islt_uint8
+// A*D function (colscale):         GB_AxD__islt_uint8
+// D*A function (rowscale):         GB_DxB__islt_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__islt_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__islt_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__islt_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISLT || GxB_NO_UINT8 || GxB_NO_ISLT_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__islt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__islt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__islt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__islt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_fp32.c b/Source/Generated/GB_binop__isne_fp32.c
index c55618398d..3bb7f4abeb 100644
--- a/Source/Generated/GB_binop__isne_fp32.c
+++ b/Source/Generated/GB_binop__isne_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_fp32
-// A.*B function (eWiseMult):  GB_AemultB__isne_fp32
-// A*D function (colscale):    GB_AxD__isne_fp32
-// D*A function (rowscale):    GB_DxB__isne_fp32
+// A+B function (eWiseAdd):         GB_AaddB__isne_fp32
+// A.*B function (eWiseMult):       GB_AemultB__isne_fp32
+// A*D function (colscale):         GB_AxD__isne_fp32
+// D*A function (rowscale):         GB_DxB__isne_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_FP32 || GxB_NO_ISNE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_fp64.c b/Source/Generated/GB_binop__isne_fp64.c
index 10d7280272..ee6e9eb47d 100644
--- a/Source/Generated/GB_binop__isne_fp64.c
+++ b/Source/Generated/GB_binop__isne_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_fp64
-// A.*B function (eWiseMult):  GB_AemultB__isne_fp64
-// A*D function (colscale):    GB_AxD__isne_fp64
-// D*A function (rowscale):    GB_DxB__isne_fp64
+// A+B function (eWiseAdd):         GB_AaddB__isne_fp64
+// A.*B function (eWiseMult):       GB_AemultB__isne_fp64
+// A*D function (colscale):         GB_AxD__isne_fp64
+// D*A function (rowscale):         GB_DxB__isne_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_FP64 || GxB_NO_ISNE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_int16.c b/Source/Generated/GB_binop__isne_int16.c
index 47aa68cd40..da06494814 100644
--- a/Source/Generated/GB_binop__isne_int16.c
+++ b/Source/Generated/GB_binop__isne_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_int16
-// A.*B function (eWiseMult):  GB_AemultB__isne_int16
-// A*D function (colscale):    GB_AxD__isne_int16
-// D*A function (rowscale):    GB_DxB__isne_int16
+// A+B function (eWiseAdd):         GB_AaddB__isne_int16
+// A.*B function (eWiseMult):       GB_AemultB__isne_int16
+// A*D function (colscale):         GB_AxD__isne_int16
+// D*A function (rowscale):         GB_DxB__isne_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_INT16 || GxB_NO_ISNE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_int32.c b/Source/Generated/GB_binop__isne_int32.c
index 012e6982a1..1cbf8f2f71 100644
--- a/Source/Generated/GB_binop__isne_int32.c
+++ b/Source/Generated/GB_binop__isne_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_int32
-// A.*B function (eWiseMult):  GB_AemultB__isne_int32
-// A*D function (colscale):    GB_AxD__isne_int32
-// D*A function (rowscale):    GB_DxB__isne_int32
+// A+B function (eWiseAdd):         GB_AaddB__isne_int32
+// A.*B function (eWiseMult):       GB_AemultB__isne_int32
+// A*D function (colscale):         GB_AxD__isne_int32
+// D*A function (rowscale):         GB_DxB__isne_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_INT32 || GxB_NO_ISNE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_int64.c b/Source/Generated/GB_binop__isne_int64.c
index 9e580ab805..97dc661607 100644
--- a/Source/Generated/GB_binop__isne_int64.c
+++ b/Source/Generated/GB_binop__isne_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_int64
-// A.*B function (eWiseMult):  GB_AemultB__isne_int64
-// A*D function (colscale):    GB_AxD__isne_int64
-// D*A function (rowscale):    GB_DxB__isne_int64
+// A+B function (eWiseAdd):         GB_AaddB__isne_int64
+// A.*B function (eWiseMult):       GB_AemultB__isne_int64
+// A*D function (colscale):         GB_AxD__isne_int64
+// D*A function (rowscale):         GB_DxB__isne_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_INT64 || GxB_NO_ISNE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_int8.c b/Source/Generated/GB_binop__isne_int8.c
index 06269fd084..579ec52ad1 100644
--- a/Source/Generated/GB_binop__isne_int8.c
+++ b/Source/Generated/GB_binop__isne_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_int8
-// A.*B function (eWiseMult):  GB_AemultB__isne_int8
-// A*D function (colscale):    GB_AxD__isne_int8
-// D*A function (rowscale):    GB_DxB__isne_int8
+// A+B function (eWiseAdd):         GB_AaddB__isne_int8
+// A.*B function (eWiseMult):       GB_AemultB__isne_int8
+// A*D function (colscale):         GB_AxD__isne_int8
+// D*A function (rowscale):         GB_DxB__isne_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_INT8 || GxB_NO_ISNE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_uint16.c b/Source/Generated/GB_binop__isne_uint16.c
index 8b345ac88a..510612dda5 100644
--- a/Source/Generated/GB_binop__isne_uint16.c
+++ b/Source/Generated/GB_binop__isne_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_uint16
-// A.*B function (eWiseMult):  GB_AemultB__isne_uint16
-// A*D function (colscale):    GB_AxD__isne_uint16
-// D*A function (rowscale):    GB_DxB__isne_uint16
+// A+B function (eWiseAdd):         GB_AaddB__isne_uint16
+// A.*B function (eWiseMult):       GB_AemultB__isne_uint16
+// A*D function (colscale):         GB_AxD__isne_uint16
+// D*A function (rowscale):         GB_DxB__isne_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_UINT16 || GxB_NO_ISNE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_uint32.c b/Source/Generated/GB_binop__isne_uint32.c
index e1285fb660..c347cb49ef 100644
--- a/Source/Generated/GB_binop__isne_uint32.c
+++ b/Source/Generated/GB_binop__isne_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_uint32
-// A.*B function (eWiseMult):  GB_AemultB__isne_uint32
-// A*D function (colscale):    GB_AxD__isne_uint32
-// D*A function (rowscale):    GB_DxB__isne_uint32
+// A+B function (eWiseAdd):         GB_AaddB__isne_uint32
+// A.*B function (eWiseMult):       GB_AemultB__isne_uint32
+// A*D function (colscale):         GB_AxD__isne_uint32
+// D*A function (rowscale):         GB_DxB__isne_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_UINT32 || GxB_NO_ISNE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_uint64.c b/Source/Generated/GB_binop__isne_uint64.c
index e648971204..63ec04cd91 100644
--- a/Source/Generated/GB_binop__isne_uint64.c
+++ b/Source/Generated/GB_binop__isne_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_uint64
-// A.*B function (eWiseMult):  GB_AemultB__isne_uint64
-// A*D function (colscale):    GB_AxD__isne_uint64
-// D*A function (rowscale):    GB_DxB__isne_uint64
+// A+B function (eWiseAdd):         GB_AaddB__isne_uint64
+// A.*B function (eWiseMult):       GB_AemultB__isne_uint64
+// A*D function (colscale):         GB_AxD__isne_uint64
+// D*A function (rowscale):         GB_DxB__isne_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_UINT64 || GxB_NO_ISNE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__isne_uint8.c b/Source/Generated/GB_binop__isne_uint8.c
index ea02933511..fcb3323be2 100644
--- a/Source/Generated/GB_binop__isne_uint8.c
+++ b/Source/Generated/GB_binop__isne_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__isne_uint8
-// A.*B function (eWiseMult):  GB_AemultB__isne_uint8
-// A*D function (colscale):    GB_AxD__isne_uint8
-// D*A function (rowscale):    GB_DxB__isne_uint8
+// A+B function (eWiseAdd):         GB_AaddB__isne_uint8
+// A.*B function (eWiseMult):       GB_AemultB__isne_uint8
+// A*D function (colscale):         GB_AxD__isne_uint8
+// D*A function (rowscale):         GB_DxB__isne_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__isne_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__isne_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__isne_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_ISNE || GxB_NO_UINT8 || GxB_NO_ISNE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__isne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__isne_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__isne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__isne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_bool.c b/Source/Generated/GB_binop__land_bool.c
index f59dbe5a67..dad86836e7 100644
--- a/Source/Generated/GB_binop__land_bool.c
+++ b/Source/Generated/GB_binop__land_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_bool
-// A.*B function (eWiseMult):  GB_AemultB__land_bool
-// A*D function (colscale):    GB_AxD__land_bool
-// D*A function (rowscale):    GB_DxB__land_bool
+// A+B function (eWiseAdd):         GB_AaddB__land_bool
+// A.*B function (eWiseMult):       GB_AemultB__land_bool
+// A*D function (colscale):         GB_AxD__land_bool
+// D*A function (rowscale):         GB_DxB__land_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__land_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__land_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x && y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_LAND_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_fp32.c b/Source/Generated/GB_binop__land_fp32.c
index 803111320e..031298b612 100644
--- a/Source/Generated/GB_binop__land_fp32.c
+++ b/Source/Generated/GB_binop__land_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_fp32
-// A.*B function (eWiseMult):  GB_AemultB__land_fp32
-// A*D function (colscale):    GB_AxD__land_fp32
-// D*A function (rowscale):    GB_DxB__land_fp32
+// A+B function (eWiseAdd):         GB_AaddB__land_fp32
+// A.*B function (eWiseMult):       GB_AemultB__land_fp32
+// A*D function (colscale):         GB_AxD__land_fp32
+// D*A function (rowscale):         GB_DxB__land_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__land_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__land_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_FP32 || GxB_NO_LAND_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_fp64.c b/Source/Generated/GB_binop__land_fp64.c
index 1d6f55e2dd..a6dfd3d217 100644
--- a/Source/Generated/GB_binop__land_fp64.c
+++ b/Source/Generated/GB_binop__land_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_fp64
-// A.*B function (eWiseMult):  GB_AemultB__land_fp64
-// A*D function (colscale):    GB_AxD__land_fp64
-// D*A function (rowscale):    GB_DxB__land_fp64
+// A+B function (eWiseAdd):         GB_AaddB__land_fp64
+// A.*B function (eWiseMult):       GB_AemultB__land_fp64
+// A*D function (colscale):         GB_AxD__land_fp64
+// D*A function (rowscale):         GB_DxB__land_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__land_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__land_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_FP64 || GxB_NO_LAND_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_int16.c b/Source/Generated/GB_binop__land_int16.c
index d19cd3e906..7ae9a40f54 100644
--- a/Source/Generated/GB_binop__land_int16.c
+++ b/Source/Generated/GB_binop__land_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_int16
-// A.*B function (eWiseMult):  GB_AemultB__land_int16
-// A*D function (colscale):    GB_AxD__land_int16
-// D*A function (rowscale):    GB_DxB__land_int16
+// A+B function (eWiseAdd):         GB_AaddB__land_int16
+// A.*B function (eWiseMult):       GB_AemultB__land_int16
+// A*D function (colscale):         GB_AxD__land_int16
+// D*A function (rowscale):         GB_DxB__land_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__land_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__land_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_INT16 || GxB_NO_LAND_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_int32.c b/Source/Generated/GB_binop__land_int32.c
index f7280c9452..98ea3250b6 100644
--- a/Source/Generated/GB_binop__land_int32.c
+++ b/Source/Generated/GB_binop__land_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_int32
-// A.*B function (eWiseMult):  GB_AemultB__land_int32
-// A*D function (colscale):    GB_AxD__land_int32
-// D*A function (rowscale):    GB_DxB__land_int32
+// A+B function (eWiseAdd):         GB_AaddB__land_int32
+// A.*B function (eWiseMult):       GB_AemultB__land_int32
+// A*D function (colscale):         GB_AxD__land_int32
+// D*A function (rowscale):         GB_DxB__land_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__land_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__land_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_INT32 || GxB_NO_LAND_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_int64.c b/Source/Generated/GB_binop__land_int64.c
index c625b772e4..6970a7452d 100644
--- a/Source/Generated/GB_binop__land_int64.c
+++ b/Source/Generated/GB_binop__land_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_int64
-// A.*B function (eWiseMult):  GB_AemultB__land_int64
-// A*D function (colscale):    GB_AxD__land_int64
-// D*A function (rowscale):    GB_DxB__land_int64
+// A+B function (eWiseAdd):         GB_AaddB__land_int64
+// A.*B function (eWiseMult):       GB_AemultB__land_int64
+// A*D function (colscale):         GB_AxD__land_int64
+// D*A function (rowscale):         GB_DxB__land_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__land_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__land_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_INT64 || GxB_NO_LAND_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_int8.c b/Source/Generated/GB_binop__land_int8.c
index bc48a30ead..6d76016190 100644
--- a/Source/Generated/GB_binop__land_int8.c
+++ b/Source/Generated/GB_binop__land_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_int8
-// A.*B function (eWiseMult):  GB_AemultB__land_int8
-// A*D function (colscale):    GB_AxD__land_int8
-// D*A function (rowscale):    GB_DxB__land_int8
+// A+B function (eWiseAdd):         GB_AaddB__land_int8
+// A.*B function (eWiseMult):       GB_AemultB__land_int8
+// A*D function (colscale):         GB_AxD__land_int8
+// D*A function (rowscale):         GB_DxB__land_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__land_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__land_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_INT8 || GxB_NO_LAND_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_uint16.c b/Source/Generated/GB_binop__land_uint16.c
index 0466632b80..ccb0a0a1e8 100644
--- a/Source/Generated/GB_binop__land_uint16.c
+++ b/Source/Generated/GB_binop__land_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_uint16
-// A.*B function (eWiseMult):  GB_AemultB__land_uint16
-// A*D function (colscale):    GB_AxD__land_uint16
-// D*A function (rowscale):    GB_DxB__land_uint16
+// A+B function (eWiseAdd):         GB_AaddB__land_uint16
+// A.*B function (eWiseMult):       GB_AemultB__land_uint16
+// A*D function (colscale):         GB_AxD__land_uint16
+// D*A function (rowscale):         GB_DxB__land_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__land_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__land_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_UINT16 || GxB_NO_LAND_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_uint32.c b/Source/Generated/GB_binop__land_uint32.c
index f23eef24a3..0137e913f5 100644
--- a/Source/Generated/GB_binop__land_uint32.c
+++ b/Source/Generated/GB_binop__land_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_uint32
-// A.*B function (eWiseMult):  GB_AemultB__land_uint32
-// A*D function (colscale):    GB_AxD__land_uint32
-// D*A function (rowscale):    GB_DxB__land_uint32
+// A+B function (eWiseAdd):         GB_AaddB__land_uint32
+// A.*B function (eWiseMult):       GB_AemultB__land_uint32
+// A*D function (colscale):         GB_AxD__land_uint32
+// D*A function (rowscale):         GB_DxB__land_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__land_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__land_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_UINT32 || GxB_NO_LAND_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_uint64.c b/Source/Generated/GB_binop__land_uint64.c
index 352b8eb5a1..46dac1d047 100644
--- a/Source/Generated/GB_binop__land_uint64.c
+++ b/Source/Generated/GB_binop__land_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_uint64
-// A.*B function (eWiseMult):  GB_AemultB__land_uint64
-// A*D function (colscale):    GB_AxD__land_uint64
-// D*A function (rowscale):    GB_DxB__land_uint64
+// A+B function (eWiseAdd):         GB_AaddB__land_uint64
+// A.*B function (eWiseMult):       GB_AemultB__land_uint64
+// A*D function (colscale):         GB_AxD__land_uint64
+// D*A function (rowscale):         GB_DxB__land_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__land_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__land_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_UINT64 || GxB_NO_LAND_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__land_uint8.c b/Source/Generated/GB_binop__land_uint8.c
index 47f7c50bde..9b148e1dc2 100644
--- a/Source/Generated/GB_binop__land_uint8.c
+++ b/Source/Generated/GB_binop__land_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__land_uint8
-// A.*B function (eWiseMult):  GB_AemultB__land_uint8
-// A*D function (colscale):    GB_AxD__land_uint8
-// D*A function (rowscale):    GB_DxB__land_uint8
+// A+B function (eWiseAdd):         GB_AaddB__land_uint8
+// A.*B function (eWiseMult):       GB_AemultB__land_uint8
+// A*D function (colscale):         GB_AxD__land_uint8
+// D*A function (rowscale):         GB_DxB__land_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__land_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__land_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__land_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) && (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_UINT8 || GxB_NO_LAND_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__land_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__land_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__land_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__land_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_bool.c b/Source/Generated/GB_binop__le_bool.c
index 67ea6e279b..e3838b887e 100644
--- a/Source/Generated/GB_binop__le_bool.c
+++ b/Source/Generated/GB_binop__le_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_bool
-// A.*B function (eWiseMult):  GB_AemultB__le_bool
-// A*D function (colscale):    GB_AxD__le_bool
-// D*A function (rowscale):    GB_DxB__le_bool
+// A+B function (eWiseAdd):         GB_AaddB__le_bool
+// A.*B function (eWiseMult):       GB_AemultB__le_bool
+// A*D function (colscale):         GB_AxD__le_bool
+// D*A function (rowscale):         GB_DxB__le_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__le_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__le_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_BOOL || GxB_NO_LE_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_fp32.c b/Source/Generated/GB_binop__le_fp32.c
index b1f40d4835..37f7acc0ca 100644
--- a/Source/Generated/GB_binop__le_fp32.c
+++ b/Source/Generated/GB_binop__le_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_fp32
-// A.*B function (eWiseMult):  GB_AemultB__le_fp32
-// A*D function (colscale):    GB_AxD__le_fp32
-// D*A function (rowscale):    GB_DxB__le_fp32
+// A+B function (eWiseAdd):         GB_AaddB__le_fp32
+// A.*B function (eWiseMult):       GB_AemultB__le_fp32
+// A*D function (colscale):         GB_AxD__le_fp32
+// D*A function (rowscale):         GB_DxB__le_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__le_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__le_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_FP32 || GxB_NO_LE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_fp64.c b/Source/Generated/GB_binop__le_fp64.c
index 03246cad51..9ee670f65c 100644
--- a/Source/Generated/GB_binop__le_fp64.c
+++ b/Source/Generated/GB_binop__le_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_fp64
-// A.*B function (eWiseMult):  GB_AemultB__le_fp64
-// A*D function (colscale):    GB_AxD__le_fp64
-// D*A function (rowscale):    GB_DxB__le_fp64
+// A+B function (eWiseAdd):         GB_AaddB__le_fp64
+// A.*B function (eWiseMult):       GB_AemultB__le_fp64
+// A*D function (colscale):         GB_AxD__le_fp64
+// D*A function (rowscale):         GB_DxB__le_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__le_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__le_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_FP64 || GxB_NO_LE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_int16.c b/Source/Generated/GB_binop__le_int16.c
index d4fe63be39..1b26f195bf 100644
--- a/Source/Generated/GB_binop__le_int16.c
+++ b/Source/Generated/GB_binop__le_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_int16
-// A.*B function (eWiseMult):  GB_AemultB__le_int16
-// A*D function (colscale):    GB_AxD__le_int16
-// D*A function (rowscale):    GB_DxB__le_int16
+// A+B function (eWiseAdd):         GB_AaddB__le_int16
+// A.*B function (eWiseMult):       GB_AemultB__le_int16
+// A*D function (colscale):         GB_AxD__le_int16
+// D*A function (rowscale):         GB_DxB__le_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__le_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__le_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_INT16 || GxB_NO_LE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_int32.c b/Source/Generated/GB_binop__le_int32.c
index 17f9246e07..669663da8a 100644
--- a/Source/Generated/GB_binop__le_int32.c
+++ b/Source/Generated/GB_binop__le_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_int32
-// A.*B function (eWiseMult):  GB_AemultB__le_int32
-// A*D function (colscale):    GB_AxD__le_int32
-// D*A function (rowscale):    GB_DxB__le_int32
+// A+B function (eWiseAdd):         GB_AaddB__le_int32
+// A.*B function (eWiseMult):       GB_AemultB__le_int32
+// A*D function (colscale):         GB_AxD__le_int32
+// D*A function (rowscale):         GB_DxB__le_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__le_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__le_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_INT32 || GxB_NO_LE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_int64.c b/Source/Generated/GB_binop__le_int64.c
index 60f845125b..aa9700a684 100644
--- a/Source/Generated/GB_binop__le_int64.c
+++ b/Source/Generated/GB_binop__le_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_int64
-// A.*B function (eWiseMult):  GB_AemultB__le_int64
-// A*D function (colscale):    GB_AxD__le_int64
-// D*A function (rowscale):    GB_DxB__le_int64
+// A+B function (eWiseAdd):         GB_AaddB__le_int64
+// A.*B function (eWiseMult):       GB_AemultB__le_int64
+// A*D function (colscale):         GB_AxD__le_int64
+// D*A function (rowscale):         GB_DxB__le_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__le_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__le_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_INT64 || GxB_NO_LE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_int8.c b/Source/Generated/GB_binop__le_int8.c
index fb3321457c..8ace666767 100644
--- a/Source/Generated/GB_binop__le_int8.c
+++ b/Source/Generated/GB_binop__le_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_int8
-// A.*B function (eWiseMult):  GB_AemultB__le_int8
-// A*D function (colscale):    GB_AxD__le_int8
-// D*A function (rowscale):    GB_DxB__le_int8
+// A+B function (eWiseAdd):         GB_AaddB__le_int8
+// A.*B function (eWiseMult):       GB_AemultB__le_int8
+// A*D function (colscale):         GB_AxD__le_int8
+// D*A function (rowscale):         GB_DxB__le_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__le_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__le_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_INT8 || GxB_NO_LE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_uint16.c b/Source/Generated/GB_binop__le_uint16.c
index b1f7a91052..6d327b79f3 100644
--- a/Source/Generated/GB_binop__le_uint16.c
+++ b/Source/Generated/GB_binop__le_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_uint16
-// A.*B function (eWiseMult):  GB_AemultB__le_uint16
-// A*D function (colscale):    GB_AxD__le_uint16
-// D*A function (rowscale):    GB_DxB__le_uint16
+// A+B function (eWiseAdd):         GB_AaddB__le_uint16
+// A.*B function (eWiseMult):       GB_AemultB__le_uint16
+// A*D function (colscale):         GB_AxD__le_uint16
+// D*A function (rowscale):         GB_DxB__le_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__le_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__le_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_UINT16 || GxB_NO_LE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_uint32.c b/Source/Generated/GB_binop__le_uint32.c
index 498a83ace4..2d1eccdc1e 100644
--- a/Source/Generated/GB_binop__le_uint32.c
+++ b/Source/Generated/GB_binop__le_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_uint32
-// A.*B function (eWiseMult):  GB_AemultB__le_uint32
-// A*D function (colscale):    GB_AxD__le_uint32
-// D*A function (rowscale):    GB_DxB__le_uint32
+// A+B function (eWiseAdd):         GB_AaddB__le_uint32
+// A.*B function (eWiseMult):       GB_AemultB__le_uint32
+// A*D function (colscale):         GB_AxD__le_uint32
+// D*A function (rowscale):         GB_DxB__le_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__le_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__le_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_UINT32 || GxB_NO_LE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_uint64.c b/Source/Generated/GB_binop__le_uint64.c
index 32c48f2470..6542226f09 100644
--- a/Source/Generated/GB_binop__le_uint64.c
+++ b/Source/Generated/GB_binop__le_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_uint64
-// A.*B function (eWiseMult):  GB_AemultB__le_uint64
-// A*D function (colscale):    GB_AxD__le_uint64
-// D*A function (rowscale):    GB_DxB__le_uint64
+// A+B function (eWiseAdd):         GB_AaddB__le_uint64
+// A.*B function (eWiseMult):       GB_AemultB__le_uint64
+// A*D function (colscale):         GB_AxD__le_uint64
+// D*A function (rowscale):         GB_DxB__le_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__le_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__le_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_UINT64 || GxB_NO_LE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__le_uint8.c b/Source/Generated/GB_binop__le_uint8.c
index 35d0b66363..51c2923209 100644
--- a/Source/Generated/GB_binop__le_uint8.c
+++ b/Source/Generated/GB_binop__le_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__le_uint8
-// A.*B function (eWiseMult):  GB_AemultB__le_uint8
-// A*D function (colscale):    GB_AxD__le_uint8
-// D*A function (rowscale):    GB_DxB__le_uint8
+// A+B function (eWiseAdd):         GB_AaddB__le_uint8
+// A.*B function (eWiseMult):       GB_AemultB__le_uint8
+// A*D function (colscale):         GB_AxD__le_uint8
+// D*A function (rowscale):         GB_DxB__le_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__le_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__le_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__le_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x <= y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LE || GxB_NO_UINT8 || GxB_NO_LE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__le_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__le_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__le_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__le_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_bool.c b/Source/Generated/GB_binop__lor_bool.c
index aae5b3b5a8..c96d7f62db 100644
--- a/Source/Generated/GB_binop__lor_bool.c
+++ b/Source/Generated/GB_binop__lor_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_bool
-// A.*B function (eWiseMult):  GB_AemultB__lor_bool
-// A*D function (colscale):    GB_AxD__lor_bool
-// D*A function (rowscale):    GB_DxB__lor_bool
+// A+B function (eWiseAdd):         GB_AaddB__lor_bool
+// A.*B function (eWiseMult):       GB_AemultB__lor_bool
+// A*D function (colscale):         GB_AxD__lor_bool
+// D*A function (rowscale):         GB_DxB__lor_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x || y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_LOR_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_fp32.c b/Source/Generated/GB_binop__lor_fp32.c
index fc7ef22b88..d9f53ce325 100644
--- a/Source/Generated/GB_binop__lor_fp32.c
+++ b/Source/Generated/GB_binop__lor_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_fp32
-// A.*B function (eWiseMult):  GB_AemultB__lor_fp32
-// A*D function (colscale):    GB_AxD__lor_fp32
-// D*A function (rowscale):    GB_DxB__lor_fp32
+// A+B function (eWiseAdd):         GB_AaddB__lor_fp32
+// A.*B function (eWiseMult):       GB_AemultB__lor_fp32
+// A*D function (colscale):         GB_AxD__lor_fp32
+// D*A function (rowscale):         GB_DxB__lor_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_FP32 || GxB_NO_LOR_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_fp64.c b/Source/Generated/GB_binop__lor_fp64.c
index 9a7a6011ec..f21f2f229f 100644
--- a/Source/Generated/GB_binop__lor_fp64.c
+++ b/Source/Generated/GB_binop__lor_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_fp64
-// A.*B function (eWiseMult):  GB_AemultB__lor_fp64
-// A*D function (colscale):    GB_AxD__lor_fp64
-// D*A function (rowscale):    GB_DxB__lor_fp64
+// A+B function (eWiseAdd):         GB_AaddB__lor_fp64
+// A.*B function (eWiseMult):       GB_AemultB__lor_fp64
+// A*D function (colscale):         GB_AxD__lor_fp64
+// D*A function (rowscale):         GB_DxB__lor_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_FP64 || GxB_NO_LOR_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_int16.c b/Source/Generated/GB_binop__lor_int16.c
index cd1f11b21a..a5a69c0c66 100644
--- a/Source/Generated/GB_binop__lor_int16.c
+++ b/Source/Generated/GB_binop__lor_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_int16
-// A.*B function (eWiseMult):  GB_AemultB__lor_int16
-// A*D function (colscale):    GB_AxD__lor_int16
-// D*A function (rowscale):    GB_DxB__lor_int16
+// A+B function (eWiseAdd):         GB_AaddB__lor_int16
+// A.*B function (eWiseMult):       GB_AemultB__lor_int16
+// A*D function (colscale):         GB_AxD__lor_int16
+// D*A function (rowscale):         GB_DxB__lor_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_INT16 || GxB_NO_LOR_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_int32.c b/Source/Generated/GB_binop__lor_int32.c
index a09fca84d7..087fded6bb 100644
--- a/Source/Generated/GB_binop__lor_int32.c
+++ b/Source/Generated/GB_binop__lor_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_int32
-// A.*B function (eWiseMult):  GB_AemultB__lor_int32
-// A*D function (colscale):    GB_AxD__lor_int32
-// D*A function (rowscale):    GB_DxB__lor_int32
+// A+B function (eWiseAdd):         GB_AaddB__lor_int32
+// A.*B function (eWiseMult):       GB_AemultB__lor_int32
+// A*D function (colscale):         GB_AxD__lor_int32
+// D*A function (rowscale):         GB_DxB__lor_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_INT32 || GxB_NO_LOR_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_int64.c b/Source/Generated/GB_binop__lor_int64.c
index 33486f250e..d736a671fd 100644
--- a/Source/Generated/GB_binop__lor_int64.c
+++ b/Source/Generated/GB_binop__lor_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_int64
-// A.*B function (eWiseMult):  GB_AemultB__lor_int64
-// A*D function (colscale):    GB_AxD__lor_int64
-// D*A function (rowscale):    GB_DxB__lor_int64
+// A+B function (eWiseAdd):         GB_AaddB__lor_int64
+// A.*B function (eWiseMult):       GB_AemultB__lor_int64
+// A*D function (colscale):         GB_AxD__lor_int64
+// D*A function (rowscale):         GB_DxB__lor_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_INT64 || GxB_NO_LOR_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_int8.c b/Source/Generated/GB_binop__lor_int8.c
index 59e3ddb1a7..67391d9890 100644
--- a/Source/Generated/GB_binop__lor_int8.c
+++ b/Source/Generated/GB_binop__lor_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_int8
-// A.*B function (eWiseMult):  GB_AemultB__lor_int8
-// A*D function (colscale):    GB_AxD__lor_int8
-// D*A function (rowscale):    GB_DxB__lor_int8
+// A+B function (eWiseAdd):         GB_AaddB__lor_int8
+// A.*B function (eWiseMult):       GB_AemultB__lor_int8
+// A*D function (colscale):         GB_AxD__lor_int8
+// D*A function (rowscale):         GB_DxB__lor_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_INT8 || GxB_NO_LOR_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_uint16.c b/Source/Generated/GB_binop__lor_uint16.c
index 05bc18b956..d1e717911b 100644
--- a/Source/Generated/GB_binop__lor_uint16.c
+++ b/Source/Generated/GB_binop__lor_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_uint16
-// A.*B function (eWiseMult):  GB_AemultB__lor_uint16
-// A*D function (colscale):    GB_AxD__lor_uint16
-// D*A function (rowscale):    GB_DxB__lor_uint16
+// A+B function (eWiseAdd):         GB_AaddB__lor_uint16
+// A.*B function (eWiseMult):       GB_AemultB__lor_uint16
+// A*D function (colscale):         GB_AxD__lor_uint16
+// D*A function (rowscale):         GB_DxB__lor_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_UINT16 || GxB_NO_LOR_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_uint32.c b/Source/Generated/GB_binop__lor_uint32.c
index 56150d795c..9a73b8ef9d 100644
--- a/Source/Generated/GB_binop__lor_uint32.c
+++ b/Source/Generated/GB_binop__lor_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_uint32
-// A.*B function (eWiseMult):  GB_AemultB__lor_uint32
-// A*D function (colscale):    GB_AxD__lor_uint32
-// D*A function (rowscale):    GB_DxB__lor_uint32
+// A+B function (eWiseAdd):         GB_AaddB__lor_uint32
+// A.*B function (eWiseMult):       GB_AemultB__lor_uint32
+// A*D function (colscale):         GB_AxD__lor_uint32
+// D*A function (rowscale):         GB_DxB__lor_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_UINT32 || GxB_NO_LOR_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_uint64.c b/Source/Generated/GB_binop__lor_uint64.c
index 0c07ee30eb..883b1e320f 100644
--- a/Source/Generated/GB_binop__lor_uint64.c
+++ b/Source/Generated/GB_binop__lor_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_uint64
-// A.*B function (eWiseMult):  GB_AemultB__lor_uint64
-// A*D function (colscale):    GB_AxD__lor_uint64
-// D*A function (rowscale):    GB_DxB__lor_uint64
+// A+B function (eWiseAdd):         GB_AaddB__lor_uint64
+// A.*B function (eWiseMult):       GB_AemultB__lor_uint64
+// A*D function (colscale):         GB_AxD__lor_uint64
+// D*A function (rowscale):         GB_DxB__lor_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_UINT64 || GxB_NO_LOR_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lor_uint8.c b/Source/Generated/GB_binop__lor_uint8.c
index 605d7cda89..a964585901 100644
--- a/Source/Generated/GB_binop__lor_uint8.c
+++ b/Source/Generated/GB_binop__lor_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lor_uint8
-// A.*B function (eWiseMult):  GB_AemultB__lor_uint8
-// A*D function (colscale):    GB_AxD__lor_uint8
-// D*A function (rowscale):    GB_DxB__lor_uint8
+// A+B function (eWiseAdd):         GB_AaddB__lor_uint8
+// A.*B function (eWiseMult):       GB_AemultB__lor_uint8
+// A*D function (colscale):         GB_AxD__lor_uint8
+// D*A function (rowscale):         GB_DxB__lor_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__lor_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__lor_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) || (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_UINT8 || GxB_NO_LOR_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lor_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_bool.c b/Source/Generated/GB_binop__lt_bool.c
index c8c1096452..533c58f9a9 100644
--- a/Source/Generated/GB_binop__lt_bool.c
+++ b/Source/Generated/GB_binop__lt_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_bool
-// A.*B function (eWiseMult):  GB_AemultB__lt_bool
-// A*D function (colscale):    GB_AxD__lt_bool
-// D*A function (rowscale):    GB_DxB__lt_bool
+// A+B function (eWiseAdd):         GB_AaddB__lt_bool
+// A.*B function (eWiseMult):       GB_AemultB__lt_bool
+// A*D function (colscale):         GB_AxD__lt_bool
+// D*A function (rowscale):         GB_DxB__lt_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_BOOL || GxB_NO_LT_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_fp32.c b/Source/Generated/GB_binop__lt_fp32.c
index c6a1865a1d..39980e71e3 100644
--- a/Source/Generated/GB_binop__lt_fp32.c
+++ b/Source/Generated/GB_binop__lt_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_fp32
-// A.*B function (eWiseMult):  GB_AemultB__lt_fp32
-// A*D function (colscale):    GB_AxD__lt_fp32
-// D*A function (rowscale):    GB_DxB__lt_fp32
+// A+B function (eWiseAdd):         GB_AaddB__lt_fp32
+// A.*B function (eWiseMult):       GB_AemultB__lt_fp32
+// A*D function (colscale):         GB_AxD__lt_fp32
+// D*A function (rowscale):         GB_DxB__lt_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_FP32 || GxB_NO_LT_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_fp64.c b/Source/Generated/GB_binop__lt_fp64.c
index da5d003229..32e2a3a87f 100644
--- a/Source/Generated/GB_binop__lt_fp64.c
+++ b/Source/Generated/GB_binop__lt_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_fp64
-// A.*B function (eWiseMult):  GB_AemultB__lt_fp64
-// A*D function (colscale):    GB_AxD__lt_fp64
-// D*A function (rowscale):    GB_DxB__lt_fp64
+// A+B function (eWiseAdd):         GB_AaddB__lt_fp64
+// A.*B function (eWiseMult):       GB_AemultB__lt_fp64
+// A*D function (colscale):         GB_AxD__lt_fp64
+// D*A function (rowscale):         GB_DxB__lt_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_FP64 || GxB_NO_LT_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_int16.c b/Source/Generated/GB_binop__lt_int16.c
index d5b5763e73..8da3666c45 100644
--- a/Source/Generated/GB_binop__lt_int16.c
+++ b/Source/Generated/GB_binop__lt_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_int16
-// A.*B function (eWiseMult):  GB_AemultB__lt_int16
-// A*D function (colscale):    GB_AxD__lt_int16
-// D*A function (rowscale):    GB_DxB__lt_int16
+// A+B function (eWiseAdd):         GB_AaddB__lt_int16
+// A.*B function (eWiseMult):       GB_AemultB__lt_int16
+// A*D function (colscale):         GB_AxD__lt_int16
+// D*A function (rowscale):         GB_DxB__lt_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_INT16 || GxB_NO_LT_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_int32.c b/Source/Generated/GB_binop__lt_int32.c
index 29abdd95dd..3538bd11ef 100644
--- a/Source/Generated/GB_binop__lt_int32.c
+++ b/Source/Generated/GB_binop__lt_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_int32
-// A.*B function (eWiseMult):  GB_AemultB__lt_int32
-// A*D function (colscale):    GB_AxD__lt_int32
-// D*A function (rowscale):    GB_DxB__lt_int32
+// A+B function (eWiseAdd):         GB_AaddB__lt_int32
+// A.*B function (eWiseMult):       GB_AemultB__lt_int32
+// A*D function (colscale):         GB_AxD__lt_int32
+// D*A function (rowscale):         GB_DxB__lt_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_INT32 || GxB_NO_LT_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_int64.c b/Source/Generated/GB_binop__lt_int64.c
index 41838148b3..85d7ce87e3 100644
--- a/Source/Generated/GB_binop__lt_int64.c
+++ b/Source/Generated/GB_binop__lt_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_int64
-// A.*B function (eWiseMult):  GB_AemultB__lt_int64
-// A*D function (colscale):    GB_AxD__lt_int64
-// D*A function (rowscale):    GB_DxB__lt_int64
+// A+B function (eWiseAdd):         GB_AaddB__lt_int64
+// A.*B function (eWiseMult):       GB_AemultB__lt_int64
+// A*D function (colscale):         GB_AxD__lt_int64
+// D*A function (rowscale):         GB_DxB__lt_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_INT64 || GxB_NO_LT_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_int8.c b/Source/Generated/GB_binop__lt_int8.c
index c3d85da308..9af0ef987f 100644
--- a/Source/Generated/GB_binop__lt_int8.c
+++ b/Source/Generated/GB_binop__lt_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_int8
-// A.*B function (eWiseMult):  GB_AemultB__lt_int8
-// A*D function (colscale):    GB_AxD__lt_int8
-// D*A function (rowscale):    GB_DxB__lt_int8
+// A+B function (eWiseAdd):         GB_AaddB__lt_int8
+// A.*B function (eWiseMult):       GB_AemultB__lt_int8
+// A*D function (colscale):         GB_AxD__lt_int8
+// D*A function (rowscale):         GB_DxB__lt_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_INT8 || GxB_NO_LT_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_uint16.c b/Source/Generated/GB_binop__lt_uint16.c
index 96ff14d322..066e8a7641 100644
--- a/Source/Generated/GB_binop__lt_uint16.c
+++ b/Source/Generated/GB_binop__lt_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_uint16
-// A.*B function (eWiseMult):  GB_AemultB__lt_uint16
-// A*D function (colscale):    GB_AxD__lt_uint16
-// D*A function (rowscale):    GB_DxB__lt_uint16
+// A+B function (eWiseAdd):         GB_AaddB__lt_uint16
+// A.*B function (eWiseMult):       GB_AemultB__lt_uint16
+// A*D function (colscale):         GB_AxD__lt_uint16
+// D*A function (rowscale):         GB_DxB__lt_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_UINT16 || GxB_NO_LT_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_uint32.c b/Source/Generated/GB_binop__lt_uint32.c
index 76ae4a68e2..7eafdf621e 100644
--- a/Source/Generated/GB_binop__lt_uint32.c
+++ b/Source/Generated/GB_binop__lt_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_uint32
-// A.*B function (eWiseMult):  GB_AemultB__lt_uint32
-// A*D function (colscale):    GB_AxD__lt_uint32
-// D*A function (rowscale):    GB_DxB__lt_uint32
+// A+B function (eWiseAdd):         GB_AaddB__lt_uint32
+// A.*B function (eWiseMult):       GB_AemultB__lt_uint32
+// A*D function (colscale):         GB_AxD__lt_uint32
+// D*A function (rowscale):         GB_DxB__lt_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_UINT32 || GxB_NO_LT_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_uint64.c b/Source/Generated/GB_binop__lt_uint64.c
index 62f316b48d..9fc0a4ffc6 100644
--- a/Source/Generated/GB_binop__lt_uint64.c
+++ b/Source/Generated/GB_binop__lt_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_uint64
-// A.*B function (eWiseMult):  GB_AemultB__lt_uint64
-// A*D function (colscale):    GB_AxD__lt_uint64
-// D*A function (rowscale):    GB_DxB__lt_uint64
+// A+B function (eWiseAdd):         GB_AaddB__lt_uint64
+// A.*B function (eWiseMult):       GB_AemultB__lt_uint64
+// A*D function (colscale):         GB_AxD__lt_uint64
+// D*A function (rowscale):         GB_DxB__lt_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_UINT64 || GxB_NO_LT_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lt_uint8.c b/Source/Generated/GB_binop__lt_uint8.c
index 183841aa6f..fa53956046 100644
--- a/Source/Generated/GB_binop__lt_uint8.c
+++ b/Source/Generated/GB_binop__lt_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lt_uint8
-// A.*B function (eWiseMult):  GB_AemultB__lt_uint8
-// A*D function (colscale):    GB_AxD__lt_uint8
-// D*A function (rowscale):    GB_DxB__lt_uint8
+// A+B function (eWiseAdd):         GB_AaddB__lt_uint8
+// A.*B function (eWiseMult):       GB_AemultB__lt_uint8
+// A*D function (colscale):         GB_AxD__lt_uint8
+// D*A function (rowscale):         GB_DxB__lt_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__lt_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__lt_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lt_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x < y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LT || GxB_NO_UINT8 || GxB_NO_LT_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lt_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lt_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lt_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_bool.c b/Source/Generated/GB_binop__lxor_bool.c
index 64431b3cad..c61e0878fe 100644
--- a/Source/Generated/GB_binop__lxor_bool.c
+++ b/Source/Generated/GB_binop__lxor_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_bool
-// A.*B function (eWiseMult):  GB_AemultB__lxor_bool
-// A*D function (colscale):    GB_AxD__lxor_bool
-// D*A function (rowscale):    GB_DxB__lxor_bool
+// A+B function (eWiseAdd):         GB_AaddB__lxor_bool
+// A.*B function (eWiseMult):       GB_AemultB__lxor_bool
+// A*D function (colscale):         GB_AxD__lxor_bool
+// D*A function (rowscale):         GB_DxB__lxor_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_LXOR_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_fp32.c b/Source/Generated/GB_binop__lxor_fp32.c
index 0bc06a3c40..65cd9df534 100644
--- a/Source/Generated/GB_binop__lxor_fp32.c
+++ b/Source/Generated/GB_binop__lxor_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_fp32
-// A.*B function (eWiseMult):  GB_AemultB__lxor_fp32
-// A*D function (colscale):    GB_AxD__lxor_fp32
-// D*A function (rowscale):    GB_DxB__lxor_fp32
+// A+B function (eWiseAdd):         GB_AaddB__lxor_fp32
+// A.*B function (eWiseMult):       GB_AemultB__lxor_fp32
+// A*D function (colscale):         GB_AxD__lxor_fp32
+// D*A function (rowscale):         GB_DxB__lxor_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_FP32 || GxB_NO_LXOR_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_fp64.c b/Source/Generated/GB_binop__lxor_fp64.c
index cc9014ba52..c001bfb053 100644
--- a/Source/Generated/GB_binop__lxor_fp64.c
+++ b/Source/Generated/GB_binop__lxor_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_fp64
-// A.*B function (eWiseMult):  GB_AemultB__lxor_fp64
-// A*D function (colscale):    GB_AxD__lxor_fp64
-// D*A function (rowscale):    GB_DxB__lxor_fp64
+// A+B function (eWiseAdd):         GB_AaddB__lxor_fp64
+// A.*B function (eWiseMult):       GB_AemultB__lxor_fp64
+// A*D function (colscale):         GB_AxD__lxor_fp64
+// D*A function (rowscale):         GB_DxB__lxor_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_FP64 || GxB_NO_LXOR_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_int16.c b/Source/Generated/GB_binop__lxor_int16.c
index 0f73a8d305..706ff3e0f6 100644
--- a/Source/Generated/GB_binop__lxor_int16.c
+++ b/Source/Generated/GB_binop__lxor_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_int16
-// A.*B function (eWiseMult):  GB_AemultB__lxor_int16
-// A*D function (colscale):    GB_AxD__lxor_int16
-// D*A function (rowscale):    GB_DxB__lxor_int16
+// A+B function (eWiseAdd):         GB_AaddB__lxor_int16
+// A.*B function (eWiseMult):       GB_AemultB__lxor_int16
+// A*D function (colscale):         GB_AxD__lxor_int16
+// D*A function (rowscale):         GB_DxB__lxor_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_INT16 || GxB_NO_LXOR_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_int32.c b/Source/Generated/GB_binop__lxor_int32.c
index 5f90c08ef4..7ae7b31831 100644
--- a/Source/Generated/GB_binop__lxor_int32.c
+++ b/Source/Generated/GB_binop__lxor_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_int32
-// A.*B function (eWiseMult):  GB_AemultB__lxor_int32
-// A*D function (colscale):    GB_AxD__lxor_int32
-// D*A function (rowscale):    GB_DxB__lxor_int32
+// A+B function (eWiseAdd):         GB_AaddB__lxor_int32
+// A.*B function (eWiseMult):       GB_AemultB__lxor_int32
+// A*D function (colscale):         GB_AxD__lxor_int32
+// D*A function (rowscale):         GB_DxB__lxor_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_INT32 || GxB_NO_LXOR_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_int64.c b/Source/Generated/GB_binop__lxor_int64.c
index 1eed116beb..4ae58e769d 100644
--- a/Source/Generated/GB_binop__lxor_int64.c
+++ b/Source/Generated/GB_binop__lxor_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_int64
-// A.*B function (eWiseMult):  GB_AemultB__lxor_int64
-// A*D function (colscale):    GB_AxD__lxor_int64
-// D*A function (rowscale):    GB_DxB__lxor_int64
+// A+B function (eWiseAdd):         GB_AaddB__lxor_int64
+// A.*B function (eWiseMult):       GB_AemultB__lxor_int64
+// A*D function (colscale):         GB_AxD__lxor_int64
+// D*A function (rowscale):         GB_DxB__lxor_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_INT64 || GxB_NO_LXOR_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_int8.c b/Source/Generated/GB_binop__lxor_int8.c
index fce1c9976f..5a1e004c3c 100644
--- a/Source/Generated/GB_binop__lxor_int8.c
+++ b/Source/Generated/GB_binop__lxor_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_int8
-// A.*B function (eWiseMult):  GB_AemultB__lxor_int8
-// A*D function (colscale):    GB_AxD__lxor_int8
-// D*A function (rowscale):    GB_DxB__lxor_int8
+// A+B function (eWiseAdd):         GB_AaddB__lxor_int8
+// A.*B function (eWiseMult):       GB_AemultB__lxor_int8
+// A*D function (colscale):         GB_AxD__lxor_int8
+// D*A function (rowscale):         GB_DxB__lxor_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_INT8 || GxB_NO_LXOR_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_uint16.c b/Source/Generated/GB_binop__lxor_uint16.c
index a09ad52786..bc93da7891 100644
--- a/Source/Generated/GB_binop__lxor_uint16.c
+++ b/Source/Generated/GB_binop__lxor_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_uint16
-// A.*B function (eWiseMult):  GB_AemultB__lxor_uint16
-// A*D function (colscale):    GB_AxD__lxor_uint16
-// D*A function (rowscale):    GB_DxB__lxor_uint16
+// A+B function (eWiseAdd):         GB_AaddB__lxor_uint16
+// A.*B function (eWiseMult):       GB_AemultB__lxor_uint16
+// A*D function (colscale):         GB_AxD__lxor_uint16
+// D*A function (rowscale):         GB_DxB__lxor_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_UINT16 || GxB_NO_LXOR_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_uint32.c b/Source/Generated/GB_binop__lxor_uint32.c
index 7051c408fb..4071c96249 100644
--- a/Source/Generated/GB_binop__lxor_uint32.c
+++ b/Source/Generated/GB_binop__lxor_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_uint32
-// A.*B function (eWiseMult):  GB_AemultB__lxor_uint32
-// A*D function (colscale):    GB_AxD__lxor_uint32
-// D*A function (rowscale):    GB_DxB__lxor_uint32
+// A+B function (eWiseAdd):         GB_AaddB__lxor_uint32
+// A.*B function (eWiseMult):       GB_AemultB__lxor_uint32
+// A*D function (colscale):         GB_AxD__lxor_uint32
+// D*A function (rowscale):         GB_DxB__lxor_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_UINT32 || GxB_NO_LXOR_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_uint64.c b/Source/Generated/GB_binop__lxor_uint64.c
index c01d941389..defd731d96 100644
--- a/Source/Generated/GB_binop__lxor_uint64.c
+++ b/Source/Generated/GB_binop__lxor_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_uint64
-// A.*B function (eWiseMult):  GB_AemultB__lxor_uint64
-// A*D function (colscale):    GB_AxD__lxor_uint64
-// D*A function (rowscale):    GB_DxB__lxor_uint64
+// A+B function (eWiseAdd):         GB_AaddB__lxor_uint64
+// A.*B function (eWiseMult):       GB_AemultB__lxor_uint64
+// A*D function (colscale):         GB_AxD__lxor_uint64
+// D*A function (rowscale):         GB_DxB__lxor_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_UINT64 || GxB_NO_LXOR_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__lxor_uint8.c b/Source/Generated/GB_binop__lxor_uint8.c
index 29fb9d0313..008cf2b374 100644
--- a/Source/Generated/GB_binop__lxor_uint8.c
+++ b/Source/Generated/GB_binop__lxor_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__lxor_uint8
-// A.*B function (eWiseMult):  GB_AemultB__lxor_uint8
-// A*D function (colscale):    GB_AxD__lxor_uint8
-// D*A function (rowscale):    GB_DxB__lxor_uint8
+// A+B function (eWiseAdd):         GB_AaddB__lxor_uint8
+// A.*B function (eWiseMult):       GB_AemultB__lxor_uint8
+// A*D function (colscale):         GB_AxD__lxor_uint8
+// D*A function (rowscale):         GB_DxB__lxor_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__lxor_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__lxor_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__lxor_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = ((x != 0) != (y != 0)) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_UINT8 || GxB_NO_LXOR_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__lxor_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__lxor_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__lxor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__lxor_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_fp32.c b/Source/Generated/GB_binop__max_fp32.c
index 0c69aaae03..7eb4b24e92 100644
--- a/Source/Generated/GB_binop__max_fp32.c
+++ b/Source/Generated/GB_binop__max_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_fp32
-// A.*B function (eWiseMult):  GB_AemultB__max_fp32
-// A*D function (colscale):    GB_AxD__max_fp32
-// D*A function (rowscale):    GB_DxB__max_fp32
+// A+B function (eWiseAdd):         GB_AaddB__max_fp32
+// A.*B function (eWiseMult):       GB_AemultB__max_fp32
+// A*D function (colscale):         GB_AxD__max_fp32
+// D*A function (rowscale):         GB_DxB__max_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__max_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__max_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = fmaxf (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_MAX_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_fp64.c b/Source/Generated/GB_binop__max_fp64.c
index 305f52c1b9..d94b9f1fe9 100644
--- a/Source/Generated/GB_binop__max_fp64.c
+++ b/Source/Generated/GB_binop__max_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_fp64
-// A.*B function (eWiseMult):  GB_AemultB__max_fp64
-// A*D function (colscale):    GB_AxD__max_fp64
-// D*A function (rowscale):    GB_DxB__max_fp64
+// A+B function (eWiseAdd):         GB_AaddB__max_fp64
+// A.*B function (eWiseMult):       GB_AemultB__max_fp64
+// A*D function (colscale):         GB_AxD__max_fp64
+// D*A function (rowscale):         GB_DxB__max_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__max_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__max_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = fmax (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_MAX_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_int16.c b/Source/Generated/GB_binop__max_int16.c
index f4d659442d..2a3626458a 100644
--- a/Source/Generated/GB_binop__max_int16.c
+++ b/Source/Generated/GB_binop__max_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_int16
-// A.*B function (eWiseMult):  GB_AemultB__max_int16
-// A*D function (colscale):    GB_AxD__max_int16
-// D*A function (rowscale):    GB_DxB__max_int16
+// A+B function (eWiseAdd):         GB_AaddB__max_int16
+// A.*B function (eWiseMult):       GB_AemultB__max_int16
+// A*D function (colscale):         GB_AxD__max_int16
+// D*A function (rowscale):         GB_DxB__max_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__max_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__max_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_MAX_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_int32.c b/Source/Generated/GB_binop__max_int32.c
index 4bd885b51b..cef00e8ed3 100644
--- a/Source/Generated/GB_binop__max_int32.c
+++ b/Source/Generated/GB_binop__max_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_int32
-// A.*B function (eWiseMult):  GB_AemultB__max_int32
-// A*D function (colscale):    GB_AxD__max_int32
-// D*A function (rowscale):    GB_DxB__max_int32
+// A+B function (eWiseAdd):         GB_AaddB__max_int32
+// A.*B function (eWiseMult):       GB_AemultB__max_int32
+// A*D function (colscale):         GB_AxD__max_int32
+// D*A function (rowscale):         GB_DxB__max_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__max_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__max_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_MAX_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_int64.c b/Source/Generated/GB_binop__max_int64.c
index 81c9a21771..092060d434 100644
--- a/Source/Generated/GB_binop__max_int64.c
+++ b/Source/Generated/GB_binop__max_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_int64
-// A.*B function (eWiseMult):  GB_AemultB__max_int64
-// A*D function (colscale):    GB_AxD__max_int64
-// D*A function (rowscale):    GB_DxB__max_int64
+// A+B function (eWiseAdd):         GB_AaddB__max_int64
+// A.*B function (eWiseMult):       GB_AemultB__max_int64
+// A*D function (colscale):         GB_AxD__max_int64
+// D*A function (rowscale):         GB_DxB__max_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__max_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__max_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_MAX_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_int8.c b/Source/Generated/GB_binop__max_int8.c
index 374a983c18..5a7260e146 100644
--- a/Source/Generated/GB_binop__max_int8.c
+++ b/Source/Generated/GB_binop__max_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_int8
-// A.*B function (eWiseMult):  GB_AemultB__max_int8
-// A*D function (colscale):    GB_AxD__max_int8
-// D*A function (rowscale):    GB_DxB__max_int8
+// A+B function (eWiseAdd):         GB_AaddB__max_int8
+// A.*B function (eWiseMult):       GB_AemultB__max_int8
+// A*D function (colscale):         GB_AxD__max_int8
+// D*A function (rowscale):         GB_DxB__max_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__max_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__max_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_MAX_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_uint16.c b/Source/Generated/GB_binop__max_uint16.c
index 8a42498559..1a4f8d5b9f 100644
--- a/Source/Generated/GB_binop__max_uint16.c
+++ b/Source/Generated/GB_binop__max_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_uint16
-// A.*B function (eWiseMult):  GB_AemultB__max_uint16
-// A*D function (colscale):    GB_AxD__max_uint16
-// D*A function (rowscale):    GB_DxB__max_uint16
+// A+B function (eWiseAdd):         GB_AaddB__max_uint16
+// A.*B function (eWiseMult):       GB_AemultB__max_uint16
+// A*D function (colscale):         GB_AxD__max_uint16
+// D*A function (rowscale):         GB_DxB__max_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__max_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__max_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_MAX_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_uint32.c b/Source/Generated/GB_binop__max_uint32.c
index 9582e2addd..3c5126fe37 100644
--- a/Source/Generated/GB_binop__max_uint32.c
+++ b/Source/Generated/GB_binop__max_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_uint32
-// A.*B function (eWiseMult):  GB_AemultB__max_uint32
-// A*D function (colscale):    GB_AxD__max_uint32
-// D*A function (rowscale):    GB_DxB__max_uint32
+// A+B function (eWiseAdd):         GB_AaddB__max_uint32
+// A.*B function (eWiseMult):       GB_AemultB__max_uint32
+// A*D function (colscale):         GB_AxD__max_uint32
+// D*A function (rowscale):         GB_DxB__max_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__max_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__max_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_MAX_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_uint64.c b/Source/Generated/GB_binop__max_uint64.c
index be344655e5..6e6f7e71b2 100644
--- a/Source/Generated/GB_binop__max_uint64.c
+++ b/Source/Generated/GB_binop__max_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_uint64
-// A.*B function (eWiseMult):  GB_AemultB__max_uint64
-// A*D function (colscale):    GB_AxD__max_uint64
-// D*A function (rowscale):    GB_DxB__max_uint64
+// A+B function (eWiseAdd):         GB_AaddB__max_uint64
+// A.*B function (eWiseMult):       GB_AemultB__max_uint64
+// A*D function (colscale):         GB_AxD__max_uint64
+// D*A function (rowscale):         GB_DxB__max_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__max_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__max_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_MAX_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__max_uint8.c b/Source/Generated/GB_binop__max_uint8.c
index af32ed3ebf..ad2952fb1c 100644
--- a/Source/Generated/GB_binop__max_uint8.c
+++ b/Source/Generated/GB_binop__max_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__max_uint8
-// A.*B function (eWiseMult):  GB_AemultB__max_uint8
-// A*D function (colscale):    GB_AxD__max_uint8
-// D*A function (rowscale):    GB_DxB__max_uint8
+// A+B function (eWiseAdd):         GB_AaddB__max_uint8
+// A.*B function (eWiseMult):       GB_AemultB__max_uint8
+// A*D function (colscale):         GB_AxD__max_uint8
+// D*A function (rowscale):         GB_DxB__max_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__max_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__max_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__max_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__max_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMAX (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_MAX_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__max_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__max_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__max_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_fp32.c b/Source/Generated/GB_binop__min_fp32.c
index f678174ba2..91f0984317 100644
--- a/Source/Generated/GB_binop__min_fp32.c
+++ b/Source/Generated/GB_binop__min_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_fp32
-// A.*B function (eWiseMult):  GB_AemultB__min_fp32
-// A*D function (colscale):    GB_AxD__min_fp32
-// D*A function (rowscale):    GB_DxB__min_fp32
+// A+B function (eWiseAdd):         GB_AaddB__min_fp32
+// A.*B function (eWiseMult):       GB_AemultB__min_fp32
+// A*D function (colscale):         GB_AxD__min_fp32
+// D*A function (rowscale):         GB_DxB__min_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__min_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__min_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = fminf (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_MIN_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_fp64.c b/Source/Generated/GB_binop__min_fp64.c
index c8292721ae..281390422b 100644
--- a/Source/Generated/GB_binop__min_fp64.c
+++ b/Source/Generated/GB_binop__min_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_fp64
-// A.*B function (eWiseMult):  GB_AemultB__min_fp64
-// A*D function (colscale):    GB_AxD__min_fp64
-// D*A function (rowscale):    GB_DxB__min_fp64
+// A+B function (eWiseAdd):         GB_AaddB__min_fp64
+// A.*B function (eWiseMult):       GB_AemultB__min_fp64
+// A*D function (colscale):         GB_AxD__min_fp64
+// D*A function (rowscale):         GB_DxB__min_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__min_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__min_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = fmin (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_MIN_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_int16.c b/Source/Generated/GB_binop__min_int16.c
index 72a6e459d7..bd885782f8 100644
--- a/Source/Generated/GB_binop__min_int16.c
+++ b/Source/Generated/GB_binop__min_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_int16
-// A.*B function (eWiseMult):  GB_AemultB__min_int16
-// A*D function (colscale):    GB_AxD__min_int16
-// D*A function (rowscale):    GB_DxB__min_int16
+// A+B function (eWiseAdd):         GB_AaddB__min_int16
+// A.*B function (eWiseMult):       GB_AemultB__min_int16
+// A*D function (colscale):         GB_AxD__min_int16
+// D*A function (rowscale):         GB_DxB__min_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__min_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__min_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_MIN_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_int32.c b/Source/Generated/GB_binop__min_int32.c
index 4309e20c0a..eb160d5da7 100644
--- a/Source/Generated/GB_binop__min_int32.c
+++ b/Source/Generated/GB_binop__min_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_int32
-// A.*B function (eWiseMult):  GB_AemultB__min_int32
-// A*D function (colscale):    GB_AxD__min_int32
-// D*A function (rowscale):    GB_DxB__min_int32
+// A+B function (eWiseAdd):         GB_AaddB__min_int32
+// A.*B function (eWiseMult):       GB_AemultB__min_int32
+// A*D function (colscale):         GB_AxD__min_int32
+// D*A function (rowscale):         GB_DxB__min_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__min_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__min_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_MIN_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_int64.c b/Source/Generated/GB_binop__min_int64.c
index 33f140f894..18c246815b 100644
--- a/Source/Generated/GB_binop__min_int64.c
+++ b/Source/Generated/GB_binop__min_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_int64
-// A.*B function (eWiseMult):  GB_AemultB__min_int64
-// A*D function (colscale):    GB_AxD__min_int64
-// D*A function (rowscale):    GB_DxB__min_int64
+// A+B function (eWiseAdd):         GB_AaddB__min_int64
+// A.*B function (eWiseMult):       GB_AemultB__min_int64
+// A*D function (colscale):         GB_AxD__min_int64
+// D*A function (rowscale):         GB_DxB__min_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__min_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__min_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_MIN_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_int8.c b/Source/Generated/GB_binop__min_int8.c
index c1f890392e..91d9e9b5fd 100644
--- a/Source/Generated/GB_binop__min_int8.c
+++ b/Source/Generated/GB_binop__min_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_int8
-// A.*B function (eWiseMult):  GB_AemultB__min_int8
-// A*D function (colscale):    GB_AxD__min_int8
-// D*A function (rowscale):    GB_DxB__min_int8
+// A+B function (eWiseAdd):         GB_AaddB__min_int8
+// A.*B function (eWiseMult):       GB_AemultB__min_int8
+// A*D function (colscale):         GB_AxD__min_int8
+// D*A function (rowscale):         GB_DxB__min_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__min_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__min_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_MIN_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_uint16.c b/Source/Generated/GB_binop__min_uint16.c
index b1149fcfea..605adb87bc 100644
--- a/Source/Generated/GB_binop__min_uint16.c
+++ b/Source/Generated/GB_binop__min_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_uint16
-// A.*B function (eWiseMult):  GB_AemultB__min_uint16
-// A*D function (colscale):    GB_AxD__min_uint16
-// D*A function (rowscale):    GB_DxB__min_uint16
+// A+B function (eWiseAdd):         GB_AaddB__min_uint16
+// A.*B function (eWiseMult):       GB_AemultB__min_uint16
+// A*D function (colscale):         GB_AxD__min_uint16
+// D*A function (rowscale):         GB_DxB__min_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__min_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__min_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_MIN_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_uint32.c b/Source/Generated/GB_binop__min_uint32.c
index 786cb33e60..5efd100b20 100644
--- a/Source/Generated/GB_binop__min_uint32.c
+++ b/Source/Generated/GB_binop__min_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_uint32
-// A.*B function (eWiseMult):  GB_AemultB__min_uint32
-// A*D function (colscale):    GB_AxD__min_uint32
-// D*A function (rowscale):    GB_DxB__min_uint32
+// A+B function (eWiseAdd):         GB_AaddB__min_uint32
+// A.*B function (eWiseMult):       GB_AemultB__min_uint32
+// A*D function (colscale):         GB_AxD__min_uint32
+// D*A function (rowscale):         GB_DxB__min_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__min_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__min_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_MIN_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_uint64.c b/Source/Generated/GB_binop__min_uint64.c
index 1379a435d3..c26ee84f87 100644
--- a/Source/Generated/GB_binop__min_uint64.c
+++ b/Source/Generated/GB_binop__min_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_uint64
-// A.*B function (eWiseMult):  GB_AemultB__min_uint64
-// A*D function (colscale):    GB_AxD__min_uint64
-// D*A function (rowscale):    GB_DxB__min_uint64
+// A+B function (eWiseAdd):         GB_AaddB__min_uint64
+// A.*B function (eWiseMult):       GB_AemultB__min_uint64
+// A*D function (colscale):         GB_AxD__min_uint64
+// D*A function (rowscale):         GB_DxB__min_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__min_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__min_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_MIN_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__min_uint8.c b/Source/Generated/GB_binop__min_uint8.c
index fd94cca898..39adafbef4 100644
--- a/Source/Generated/GB_binop__min_uint8.c
+++ b/Source/Generated/GB_binop__min_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__min_uint8
-// A.*B function (eWiseMult):  GB_AemultB__min_uint8
-// A*D function (colscale):    GB_AxD__min_uint8
-// D*A function (rowscale):    GB_DxB__min_uint8
+// A+B function (eWiseAdd):         GB_AaddB__min_uint8
+// A.*B function (eWiseMult):       GB_AemultB__min_uint8
+// A*D function (colscale):         GB_AxD__min_uint8
+// D*A function (rowscale):         GB_DxB__min_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__min_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__min_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__min_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__min_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IMIN (x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_MIN_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__min_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__min_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__min_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_fp32.c b/Source/Generated/GB_binop__minus_fp32.c
index 5a2ca048a8..d6c17222e2 100644
--- a/Source/Generated/GB_binop__minus_fp32.c
+++ b/Source/Generated/GB_binop__minus_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_fp32
-// A.*B function (eWiseMult):  GB_AemultB__minus_fp32
-// A*D function (colscale):    GB_AxD__minus_fp32
-// D*A function (rowscale):    GB_DxB__minus_fp32
+// A+B function (eWiseAdd):         GB_AaddB__minus_fp32
+// A.*B function (eWiseMult):       GB_AemultB__minus_fp32
+// A*D function (colscale):         GB_AxD__minus_fp32
+// D*A function (rowscale):         GB_DxB__minus_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    1
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    GB_cblas_saxpy
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_FP32 || GxB_NO_MINUS_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_fp64.c b/Source/Generated/GB_binop__minus_fp64.c
index bd943f95ef..3039ff71a9 100644
--- a/Source/Generated/GB_binop__minus_fp64.c
+++ b/Source/Generated/GB_binop__minus_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_fp64
-// A.*B function (eWiseMult):  GB_AemultB__minus_fp64
-// A*D function (colscale):    GB_AxD__minus_fp64
-// D*A function (rowscale):    GB_DxB__minus_fp64
+// A+B function (eWiseAdd):         GB_AaddB__minus_fp64
+// A.*B function (eWiseMult):       GB_AemultB__minus_fp64
+// A*D function (colscale):         GB_AxD__minus_fp64
+// D*A function (rowscale):         GB_DxB__minus_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    1
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    GB_cblas_daxpy
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_FP64 || GxB_NO_MINUS_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_int16.c b/Source/Generated/GB_binop__minus_int16.c
index ce374c35bf..076d16499c 100644
--- a/Source/Generated/GB_binop__minus_int16.c
+++ b/Source/Generated/GB_binop__minus_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_int16
-// A.*B function (eWiseMult):  GB_AemultB__minus_int16
-// A*D function (colscale):    GB_AxD__minus_int16
-// D*A function (rowscale):    GB_DxB__minus_int16
+// A+B function (eWiseAdd):         GB_AaddB__minus_int16
+// A.*B function (eWiseMult):       GB_AemultB__minus_int16
+// A*D function (colscale):         GB_AxD__minus_int16
+// D*A function (rowscale):         GB_DxB__minus_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_INT16 || GxB_NO_MINUS_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_int32.c b/Source/Generated/GB_binop__minus_int32.c
index 55e6de0ead..3cedd344fb 100644
--- a/Source/Generated/GB_binop__minus_int32.c
+++ b/Source/Generated/GB_binop__minus_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_int32
-// A.*B function (eWiseMult):  GB_AemultB__minus_int32
-// A*D function (colscale):    GB_AxD__minus_int32
-// D*A function (rowscale):    GB_DxB__minus_int32
+// A+B function (eWiseAdd):         GB_AaddB__minus_int32
+// A.*B function (eWiseMult):       GB_AemultB__minus_int32
+// A*D function (colscale):         GB_AxD__minus_int32
+// D*A function (rowscale):         GB_DxB__minus_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_INT32 || GxB_NO_MINUS_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_int64.c b/Source/Generated/GB_binop__minus_int64.c
index 731d36ebc7..2c54b85d91 100644
--- a/Source/Generated/GB_binop__minus_int64.c
+++ b/Source/Generated/GB_binop__minus_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_int64
-// A.*B function (eWiseMult):  GB_AemultB__minus_int64
-// A*D function (colscale):    GB_AxD__minus_int64
-// D*A function (rowscale):    GB_DxB__minus_int64
+// A+B function (eWiseAdd):         GB_AaddB__minus_int64
+// A.*B function (eWiseMult):       GB_AemultB__minus_int64
+// A*D function (colscale):         GB_AxD__minus_int64
+// D*A function (rowscale):         GB_DxB__minus_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_INT64 || GxB_NO_MINUS_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_int8.c b/Source/Generated/GB_binop__minus_int8.c
index cde439d422..8bcd06fb0b 100644
--- a/Source/Generated/GB_binop__minus_int8.c
+++ b/Source/Generated/GB_binop__minus_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_int8
-// A.*B function (eWiseMult):  GB_AemultB__minus_int8
-// A*D function (colscale):    GB_AxD__minus_int8
-// D*A function (rowscale):    GB_DxB__minus_int8
+// A+B function (eWiseAdd):         GB_AaddB__minus_int8
+// A.*B function (eWiseMult):       GB_AemultB__minus_int8
+// A*D function (colscale):         GB_AxD__minus_int8
+// D*A function (rowscale):         GB_DxB__minus_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_INT8 || GxB_NO_MINUS_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_uint16.c b/Source/Generated/GB_binop__minus_uint16.c
index 246aa86106..fae803586c 100644
--- a/Source/Generated/GB_binop__minus_uint16.c
+++ b/Source/Generated/GB_binop__minus_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_uint16
-// A.*B function (eWiseMult):  GB_AemultB__minus_uint16
-// A*D function (colscale):    GB_AxD__minus_uint16
-// D*A function (rowscale):    GB_DxB__minus_uint16
+// A+B function (eWiseAdd):         GB_AaddB__minus_uint16
+// A.*B function (eWiseMult):       GB_AemultB__minus_uint16
+// A*D function (colscale):         GB_AxD__minus_uint16
+// D*A function (rowscale):         GB_DxB__minus_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_UINT16 || GxB_NO_MINUS_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_uint32.c b/Source/Generated/GB_binop__minus_uint32.c
index 498839843e..04a32363f6 100644
--- a/Source/Generated/GB_binop__minus_uint32.c
+++ b/Source/Generated/GB_binop__minus_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_uint32
-// A.*B function (eWiseMult):  GB_AemultB__minus_uint32
-// A*D function (colscale):    GB_AxD__minus_uint32
-// D*A function (rowscale):    GB_DxB__minus_uint32
+// A+B function (eWiseAdd):         GB_AaddB__minus_uint32
+// A.*B function (eWiseMult):       GB_AemultB__minus_uint32
+// A*D function (colscale):         GB_AxD__minus_uint32
+// D*A function (rowscale):         GB_DxB__minus_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_UINT32 || GxB_NO_MINUS_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_uint64.c b/Source/Generated/GB_binop__minus_uint64.c
index 4cc88e80ca..4409cdd79e 100644
--- a/Source/Generated/GB_binop__minus_uint64.c
+++ b/Source/Generated/GB_binop__minus_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_uint64
-// A.*B function (eWiseMult):  GB_AemultB__minus_uint64
-// A*D function (colscale):    GB_AxD__minus_uint64
-// D*A function (rowscale):    GB_DxB__minus_uint64
+// A+B function (eWiseAdd):         GB_AaddB__minus_uint64
+// A.*B function (eWiseMult):       GB_AemultB__minus_uint64
+// A*D function (colscale):         GB_AxD__minus_uint64
+// D*A function (rowscale):         GB_DxB__minus_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_UINT64 || GxB_NO_MINUS_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__minus_uint8.c b/Source/Generated/GB_binop__minus_uint8.c
index b9e16e01e5..9b5b929b56 100644
--- a/Source/Generated/GB_binop__minus_uint8.c
+++ b/Source/Generated/GB_binop__minus_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__minus_uint8
-// A.*B function (eWiseMult):  GB_AemultB__minus_uint8
-// A*D function (colscale):    GB_AxD__minus_uint8
-// D*A function (rowscale):    GB_DxB__minus_uint8
+// A+B function (eWiseAdd):         GB_AaddB__minus_uint8
+// A.*B function (eWiseMult):       GB_AemultB__minus_uint8
+// A*D function (colscale):         GB_AxD__minus_uint8
+// D*A function (rowscale):         GB_DxB__minus_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__minus_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__minus_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__minus_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__minus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x - y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_MINUS || GxB_NO_UINT8 || GxB_NO_MINUS_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__minus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__minus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__minus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__minus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_fp32.c b/Source/Generated/GB_binop__ne_fp32.c
index 8b8bf918b4..b7af966348 100644
--- a/Source/Generated/GB_binop__ne_fp32.c
+++ b/Source/Generated/GB_binop__ne_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_fp32
-// A.*B function (eWiseMult):  GB_AemultB__ne_fp32
-// A*D function (colscale):    GB_AxD__ne_fp32
-// D*A function (rowscale):    GB_DxB__ne_fp32
+// A+B function (eWiseAdd):         GB_AaddB__ne_fp32
+// A.*B function (eWiseMult):       GB_AemultB__ne_fp32
+// A*D function (colscale):         GB_AxD__ne_fp32
+// D*A function (rowscale):         GB_DxB__ne_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_fp32
 
 // C type:   bool
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_FP32 || GxB_NO_NE_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_fp64.c b/Source/Generated/GB_binop__ne_fp64.c
index 602b587eed..03637ddc74 100644
--- a/Source/Generated/GB_binop__ne_fp64.c
+++ b/Source/Generated/GB_binop__ne_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_fp64
-// A.*B function (eWiseMult):  GB_AemultB__ne_fp64
-// A*D function (colscale):    GB_AxD__ne_fp64
-// D*A function (rowscale):    GB_DxB__ne_fp64
+// A+B function (eWiseAdd):         GB_AaddB__ne_fp64
+// A.*B function (eWiseMult):       GB_AemultB__ne_fp64
+// A*D function (colscale):         GB_AxD__ne_fp64
+// D*A function (rowscale):         GB_DxB__ne_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_fp64
 
 // C type:   bool
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_FP64 || GxB_NO_NE_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_int16.c b/Source/Generated/GB_binop__ne_int16.c
index 7944c756db..26e5094017 100644
--- a/Source/Generated/GB_binop__ne_int16.c
+++ b/Source/Generated/GB_binop__ne_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_int16
-// A.*B function (eWiseMult):  GB_AemultB__ne_int16
-// A*D function (colscale):    GB_AxD__ne_int16
-// D*A function (rowscale):    GB_DxB__ne_int16
+// A+B function (eWiseAdd):         GB_AaddB__ne_int16
+// A.*B function (eWiseMult):       GB_AemultB__ne_int16
+// A*D function (colscale):         GB_AxD__ne_int16
+// D*A function (rowscale):         GB_DxB__ne_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_int16
 
 // C type:   bool
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_INT16 || GxB_NO_NE_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_int32.c b/Source/Generated/GB_binop__ne_int32.c
index 4cf63e6220..8086931d89 100644
--- a/Source/Generated/GB_binop__ne_int32.c
+++ b/Source/Generated/GB_binop__ne_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_int32
-// A.*B function (eWiseMult):  GB_AemultB__ne_int32
-// A*D function (colscale):    GB_AxD__ne_int32
-// D*A function (rowscale):    GB_DxB__ne_int32
+// A+B function (eWiseAdd):         GB_AaddB__ne_int32
+// A.*B function (eWiseMult):       GB_AemultB__ne_int32
+// A*D function (colscale):         GB_AxD__ne_int32
+// D*A function (rowscale):         GB_DxB__ne_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_int32
 
 // C type:   bool
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_INT32 || GxB_NO_NE_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_int64.c b/Source/Generated/GB_binop__ne_int64.c
index 83b8cf36f6..360ad96570 100644
--- a/Source/Generated/GB_binop__ne_int64.c
+++ b/Source/Generated/GB_binop__ne_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_int64
-// A.*B function (eWiseMult):  GB_AemultB__ne_int64
-// A*D function (colscale):    GB_AxD__ne_int64
-// D*A function (rowscale):    GB_DxB__ne_int64
+// A+B function (eWiseAdd):         GB_AaddB__ne_int64
+// A.*B function (eWiseMult):       GB_AemultB__ne_int64
+// A*D function (colscale):         GB_AxD__ne_int64
+// D*A function (rowscale):         GB_DxB__ne_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_int64
 
 // C type:   bool
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_INT64 || GxB_NO_NE_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_int8.c b/Source/Generated/GB_binop__ne_int8.c
index 35622ddd39..d41f65990b 100644
--- a/Source/Generated/GB_binop__ne_int8.c
+++ b/Source/Generated/GB_binop__ne_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_int8
-// A.*B function (eWiseMult):  GB_AemultB__ne_int8
-// A*D function (colscale):    GB_AxD__ne_int8
-// D*A function (rowscale):    GB_DxB__ne_int8
+// A+B function (eWiseAdd):         GB_AaddB__ne_int8
+// A.*B function (eWiseMult):       GB_AemultB__ne_int8
+// A*D function (colscale):         GB_AxD__ne_int8
+// D*A function (rowscale):         GB_DxB__ne_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_int8
 
 // C type:   bool
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_INT8 || GxB_NO_NE_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_uint16.c b/Source/Generated/GB_binop__ne_uint16.c
index 1ea5fc926d..3327e4e9b2 100644
--- a/Source/Generated/GB_binop__ne_uint16.c
+++ b/Source/Generated/GB_binop__ne_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_uint16
-// A.*B function (eWiseMult):  GB_AemultB__ne_uint16
-// A*D function (colscale):    GB_AxD__ne_uint16
-// D*A function (rowscale):    GB_DxB__ne_uint16
+// A+B function (eWiseAdd):         GB_AaddB__ne_uint16
+// A.*B function (eWiseMult):       GB_AemultB__ne_uint16
+// A*D function (colscale):         GB_AxD__ne_uint16
+// D*A function (rowscale):         GB_DxB__ne_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_uint16
 
 // C type:   bool
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_UINT16 || GxB_NO_NE_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_uint32.c b/Source/Generated/GB_binop__ne_uint32.c
index b483508db9..45a97708fc 100644
--- a/Source/Generated/GB_binop__ne_uint32.c
+++ b/Source/Generated/GB_binop__ne_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_uint32
-// A.*B function (eWiseMult):  GB_AemultB__ne_uint32
-// A*D function (colscale):    GB_AxD__ne_uint32
-// D*A function (rowscale):    GB_DxB__ne_uint32
+// A+B function (eWiseAdd):         GB_AaddB__ne_uint32
+// A.*B function (eWiseMult):       GB_AemultB__ne_uint32
+// A*D function (colscale):         GB_AxD__ne_uint32
+// D*A function (rowscale):         GB_DxB__ne_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_uint32
 
 // C type:   bool
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_UINT32 || GxB_NO_NE_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_uint64.c b/Source/Generated/GB_binop__ne_uint64.c
index 95cc785c19..e20689764b 100644
--- a/Source/Generated/GB_binop__ne_uint64.c
+++ b/Source/Generated/GB_binop__ne_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_uint64
-// A.*B function (eWiseMult):  GB_AemultB__ne_uint64
-// A*D function (colscale):    GB_AxD__ne_uint64
-// D*A function (rowscale):    GB_DxB__ne_uint64
+// A+B function (eWiseAdd):         GB_AaddB__ne_uint64
+// A.*B function (eWiseMult):       GB_AemultB__ne_uint64
+// A*D function (colscale):         GB_AxD__ne_uint64
+// D*A function (rowscale):         GB_DxB__ne_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_uint64
 
 // C type:   bool
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_UINT64 || GxB_NO_NE_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__ne_uint8.c b/Source/Generated/GB_binop__ne_uint8.c
index 54c978d77b..087d5a2e2f 100644
--- a/Source/Generated/GB_binop__ne_uint8.c
+++ b/Source/Generated/GB_binop__ne_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__ne_uint8
-// A.*B function (eWiseMult):  GB_AemultB__ne_uint8
-// A*D function (colscale):    GB_AxD__ne_uint8
-// D*A function (rowscale):    GB_DxB__ne_uint8
+// A+B function (eWiseAdd):         GB_AaddB__ne_uint8
+// A.*B function (eWiseMult):       GB_AemultB__ne_uint8
+// A*D function (colscale):         GB_AxD__ne_uint8
+// D*A function (rowscale):         GB_DxB__ne_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__ne_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__ne_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__ne_uint8
 
 // C type:   bool
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x != y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_NE || GxB_NO_UINT8 || GxB_NO_NE_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__ne_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__ne_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #if 0
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    #endif
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__ne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__ne_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__pair_bool.c b/Source/Generated/GB_binop__pair_bool.c
new file mode 100644
index 0000000000..7b0ee85485
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_bool.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_bool
+// A.*B function (eWiseMult):       GB_AemultB__pair_bool
+// A*D function (colscale):         GB_AxD__pair_bool
+// D*A function (rowscale):         GB_DxB__pair_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_bool
+
+// C type:   bool
+// A type:   bool
+// B type:   bool
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    bool
+
+#define GB_BTYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_BOOL || GxB_NO_PAIR_BOOL)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    bool *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    bool *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_fp32.c b/Source/Generated/GB_binop__pair_fp32.c
new file mode 100644
index 0000000000..d772938b78
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_fp32.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_fp32
+// A.*B function (eWiseMult):       GB_AemultB__pair_fp32
+// A*D function (colscale):         GB_AxD__pair_fp32
+// D*A function (rowscale):         GB_DxB__pair_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_fp32
+
+// C type:   float
+// A type:   float
+// B type:   float
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    float
+
+#define GB_BTYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_FP32 || GxB_NO_PAIR_FP32)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    float *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    float *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_fp64.c b/Source/Generated/GB_binop__pair_fp64.c
new file mode 100644
index 0000000000..4f7841462f
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_fp64.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_fp64
+// A.*B function (eWiseMult):       GB_AemultB__pair_fp64
+// A*D function (colscale):         GB_AxD__pair_fp64
+// D*A function (rowscale):         GB_DxB__pair_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_fp64
+
+// C type:   double
+// A type:   double
+// B type:   double
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    double
+
+#define GB_BTYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_FP64 || GxB_NO_PAIR_FP64)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    double *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    double *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_int16.c b/Source/Generated/GB_binop__pair_int16.c
new file mode 100644
index 0000000000..b64c592172
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_int16.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_int16
+// A.*B function (eWiseMult):       GB_AemultB__pair_int16
+// A*D function (colscale):         GB_AxD__pair_int16
+// D*A function (rowscale):         GB_DxB__pair_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_int16
+
+// C type:   int16_t
+// A type:   int16_t
+// B type:   int16_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_BTYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_INT16 || GxB_NO_PAIR_INT16)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int16_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int16_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_int32.c b/Source/Generated/GB_binop__pair_int32.c
new file mode 100644
index 0000000000..9d4e8d533c
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_int32.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_int32
+// A.*B function (eWiseMult):       GB_AemultB__pair_int32
+// A*D function (colscale):         GB_AxD__pair_int32
+// D*A function (rowscale):         GB_DxB__pair_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_int32
+
+// C type:   int32_t
+// A type:   int32_t
+// B type:   int32_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_BTYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_INT32 || GxB_NO_PAIR_INT32)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int32_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int32_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_int64.c b/Source/Generated/GB_binop__pair_int64.c
new file mode 100644
index 0000000000..f926b2f97a
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_int64.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_int64
+// A.*B function (eWiseMult):       GB_AemultB__pair_int64
+// A*D function (colscale):         GB_AxD__pair_int64
+// D*A function (rowscale):         GB_DxB__pair_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_int64
+
+// C type:   int64_t
+// A type:   int64_t
+// B type:   int64_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_BTYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_INT64 || GxB_NO_PAIR_INT64)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int64_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int64_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_int8.c b/Source/Generated/GB_binop__pair_int8.c
new file mode 100644
index 0000000000..2f3cb66b6a
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_int8.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_int8
+// A.*B function (eWiseMult):       GB_AemultB__pair_int8
+// A*D function (colscale):         GB_AxD__pair_int8
+// D*A function (rowscale):         GB_DxB__pair_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_int8
+
+// C type:   int8_t
+// A type:   int8_t
+// B type:   int8_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_BTYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_INT8 || GxB_NO_PAIR_INT8)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int8_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int8_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_uint16.c b/Source/Generated/GB_binop__pair_uint16.c
new file mode 100644
index 0000000000..2cf7b33dd7
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_uint16.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_uint16
+// A.*B function (eWiseMult):       GB_AemultB__pair_uint16
+// A*D function (colscale):         GB_AxD__pair_uint16
+// D*A function (rowscale):         GB_DxB__pair_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_uint16
+
+// C type:   uint16_t
+// A type:   uint16_t
+// B type:   uint16_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_BTYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_UINT16 || GxB_NO_PAIR_UINT16)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint16_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint16_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_uint32.c b/Source/Generated/GB_binop__pair_uint32.c
new file mode 100644
index 0000000000..7f5215d635
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_uint32.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_uint32
+// A.*B function (eWiseMult):       GB_AemultB__pair_uint32
+// A*D function (colscale):         GB_AxD__pair_uint32
+// D*A function (rowscale):         GB_DxB__pair_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_uint32
+
+// C type:   uint32_t
+// A type:   uint32_t
+// B type:   uint32_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_BTYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_UINT32 || GxB_NO_PAIR_UINT32)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint32_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint32_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_uint64.c b/Source/Generated/GB_binop__pair_uint64.c
new file mode 100644
index 0000000000..091fcd95c6
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_uint64.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_uint64
+// A.*B function (eWiseMult):       GB_AemultB__pair_uint64
+// A*D function (colscale):         GB_AxD__pair_uint64
+// D*A function (rowscale):         GB_DxB__pair_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_uint64
+
+// C type:   uint64_t
+// A type:   uint64_t
+// B type:   uint64_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_BTYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_UINT64 || GxB_NO_PAIR_UINT64)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint64_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint64_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__pair_uint8.c b/Source/Generated/GB_binop__pair_uint8.c
new file mode 100644
index 0000000000..e97398f6db
--- /dev/null
+++ b/Source/Generated/GB_binop__pair_uint8.c
@@ -0,0 +1,289 @@
+//------------------------------------------------------------------------------
+// GB_binop:  hard-coded functions for each built-in binary operator
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_dense.h"
+#include "GB_binop__include.h"
+
+// C=binop(A,B) is defined by the following types and operators:
+
+// A+B function (eWiseAdd):         GB_AaddB__pair_uint8
+// A.*B function (eWiseMult):       GB_AemultB__pair_uint8
+// A*D function (colscale):         GB_AxD__pair_uint8
+// D*A function (rowscale):         GB_DxB__pair_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__pair_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__pair_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__pair_uint8
+
+// C type:   uint8_t
+// A type:   uint8_t
+// B type:   uint8_t
+// BinaryOp: cij = 1
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_BTYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// aij = Ax [pA]
+#define GB_GETA(aij,Ax,pA)  \
+    ;
+
+// bij = Bx [pB]
+#define GB_GETB(bij,Bx,pB)  \
+    ;
+
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
+// cij = Ax [pA]
+#define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
+
+// cij = Bx [pB]
+#define GB_COPY_B_TO_C(cij,Bx,pB) cij = Bx [pB] ;
+
+#define GB_CX(p) Cx [p]
+
+// binary operator
+#define GB_BINOP(z, x, y)   \
+    z = 1 ;
+
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
+// do the numerical phases of GB_add and GB_emult
+#define GB_PHASE_2_OF_2
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_PAIR || GxB_NO_UINT8 || GxB_NO_PAIR_UINT8)
+
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__pair_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = A*D, column scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AxD__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    const GrB_Matrix D, bool D_is_pattern,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint8_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_colscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C = D*B, row scale with diagonal D matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_DxB__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix D, bool D_is_pattern,
+    const GrB_Matrix B, bool B_is_pattern,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint8_t *GB_RESTRICT Cx = C->x ;
+    #include "GB_AxB_rowscale_meta.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseAdd: C = A+B or C<M> = A+B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AaddB__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const bool Ch_is_Mh,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_add_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// eWiseMult: C = A.*B or C<M> = A.*B
+//------------------------------------------------------------------------------
+
+GrB_Info GB_AemultB__pair_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int64_t *GB_RESTRICT C_to_M,
+    const int64_t *GB_RESTRICT C_to_A,
+    const int64_t *GB_RESTRICT C_to_B,
+    const GB_task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_emult_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_binop__plus_fp32.c b/Source/Generated/GB_binop__plus_fp32.c
index 11af21736b..c5f2e81280 100644
--- a/Source/Generated/GB_binop__plus_fp32.c
+++ b/Source/Generated/GB_binop__plus_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_fp32
-// A.*B function (eWiseMult):  GB_AemultB__plus_fp32
-// A*D function (colscale):    GB_AxD__plus_fp32
-// D*A function (rowscale):    GB_DxB__plus_fp32
+// A+B function (eWiseAdd):         GB_AaddB__plus_fp32
+// A.*B function (eWiseMult):       GB_AemultB__plus_fp32
+// A*D function (colscale):         GB_AxD__plus_fp32
+// D*A function (rowscale):         GB_DxB__plus_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    1
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    GB_cblas_saxpy
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_PLUS_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_fp64.c b/Source/Generated/GB_binop__plus_fp64.c
index 8afbd8e1fe..ebe574d3ab 100644
--- a/Source/Generated/GB_binop__plus_fp64.c
+++ b/Source/Generated/GB_binop__plus_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_fp64
-// A.*B function (eWiseMult):  GB_AemultB__plus_fp64
-// A*D function (colscale):    GB_AxD__plus_fp64
-// D*A function (rowscale):    GB_DxB__plus_fp64
+// A+B function (eWiseAdd):         GB_AaddB__plus_fp64
+// A.*B function (eWiseMult):       GB_AemultB__plus_fp64
+// A*D function (colscale):         GB_AxD__plus_fp64
+// D*A function (rowscale):         GB_DxB__plus_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    1
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    GB_cblas_daxpy
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_PLUS_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_int16.c b/Source/Generated/GB_binop__plus_int16.c
index 930a7adc93..f61e88a315 100644
--- a/Source/Generated/GB_binop__plus_int16.c
+++ b/Source/Generated/GB_binop__plus_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_int16
-// A.*B function (eWiseMult):  GB_AemultB__plus_int16
-// A*D function (colscale):    GB_AxD__plus_int16
-// D*A function (rowscale):    GB_DxB__plus_int16
+// A+B function (eWiseAdd):         GB_AaddB__plus_int16
+// A.*B function (eWiseMult):       GB_AemultB__plus_int16
+// A*D function (colscale):         GB_AxD__plus_int16
+// D*A function (rowscale):         GB_DxB__plus_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_PLUS_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_int32.c b/Source/Generated/GB_binop__plus_int32.c
index bd4fe0d60f..7ce251ed9e 100644
--- a/Source/Generated/GB_binop__plus_int32.c
+++ b/Source/Generated/GB_binop__plus_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_int32
-// A.*B function (eWiseMult):  GB_AemultB__plus_int32
-// A*D function (colscale):    GB_AxD__plus_int32
-// D*A function (rowscale):    GB_DxB__plus_int32
+// A+B function (eWiseAdd):         GB_AaddB__plus_int32
+// A.*B function (eWiseMult):       GB_AemultB__plus_int32
+// A*D function (colscale):         GB_AxD__plus_int32
+// D*A function (rowscale):         GB_DxB__plus_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_PLUS_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_int64.c b/Source/Generated/GB_binop__plus_int64.c
index 08c97f7b72..43cb4564e5 100644
--- a/Source/Generated/GB_binop__plus_int64.c
+++ b/Source/Generated/GB_binop__plus_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_int64
-// A.*B function (eWiseMult):  GB_AemultB__plus_int64
-// A*D function (colscale):    GB_AxD__plus_int64
-// D*A function (rowscale):    GB_DxB__plus_int64
+// A+B function (eWiseAdd):         GB_AaddB__plus_int64
+// A.*B function (eWiseMult):       GB_AemultB__plus_int64
+// A*D function (colscale):         GB_AxD__plus_int64
+// D*A function (rowscale):         GB_DxB__plus_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_PLUS_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_int8.c b/Source/Generated/GB_binop__plus_int8.c
index 96fe744bce..7c09eb902c 100644
--- a/Source/Generated/GB_binop__plus_int8.c
+++ b/Source/Generated/GB_binop__plus_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_int8
-// A.*B function (eWiseMult):  GB_AemultB__plus_int8
-// A*D function (colscale):    GB_AxD__plus_int8
-// D*A function (rowscale):    GB_DxB__plus_int8
+// A+B function (eWiseAdd):         GB_AaddB__plus_int8
+// A.*B function (eWiseMult):       GB_AemultB__plus_int8
+// A*D function (colscale):         GB_AxD__plus_int8
+// D*A function (rowscale):         GB_DxB__plus_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_PLUS_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_uint16.c b/Source/Generated/GB_binop__plus_uint16.c
index 4193c3227a..6db771cdc3 100644
--- a/Source/Generated/GB_binop__plus_uint16.c
+++ b/Source/Generated/GB_binop__plus_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_uint16
-// A.*B function (eWiseMult):  GB_AemultB__plus_uint16
-// A*D function (colscale):    GB_AxD__plus_uint16
-// D*A function (rowscale):    GB_DxB__plus_uint16
+// A+B function (eWiseAdd):         GB_AaddB__plus_uint16
+// A.*B function (eWiseMult):       GB_AemultB__plus_uint16
+// A*D function (colscale):         GB_AxD__plus_uint16
+// D*A function (rowscale):         GB_DxB__plus_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_uint32.c b/Source/Generated/GB_binop__plus_uint32.c
index 03b8aa7ce5..067dd8df13 100644
--- a/Source/Generated/GB_binop__plus_uint32.c
+++ b/Source/Generated/GB_binop__plus_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_uint32
-// A.*B function (eWiseMult):  GB_AemultB__plus_uint32
-// A*D function (colscale):    GB_AxD__plus_uint32
-// D*A function (rowscale):    GB_DxB__plus_uint32
+// A+B function (eWiseAdd):         GB_AaddB__plus_uint32
+// A.*B function (eWiseMult):       GB_AemultB__plus_uint32
+// A*D function (colscale):         GB_AxD__plus_uint32
+// D*A function (rowscale):         GB_DxB__plus_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_uint64.c b/Source/Generated/GB_binop__plus_uint64.c
index abd6c9204d..57f515dfd1 100644
--- a/Source/Generated/GB_binop__plus_uint64.c
+++ b/Source/Generated/GB_binop__plus_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_uint64
-// A.*B function (eWiseMult):  GB_AemultB__plus_uint64
-// A*D function (colscale):    GB_AxD__plus_uint64
-// D*A function (rowscale):    GB_DxB__plus_uint64
+// A+B function (eWiseAdd):         GB_AaddB__plus_uint64
+// A.*B function (eWiseMult):       GB_AemultB__plus_uint64
+// A*D function (colscale):         GB_AxD__plus_uint64
+// D*A function (rowscale):         GB_DxB__plus_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__plus_uint8.c b/Source/Generated/GB_binop__plus_uint8.c
index a922ed3cb3..66588df04f 100644
--- a/Source/Generated/GB_binop__plus_uint8.c
+++ b/Source/Generated/GB_binop__plus_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__plus_uint8
-// A.*B function (eWiseMult):  GB_AemultB__plus_uint8
-// A*D function (colscale):    GB_AxD__plus_uint8
-// D*A function (rowscale):    GB_DxB__plus_uint8
+// A+B function (eWiseAdd):         GB_AaddB__plus_uint8
+// A.*B function (eWiseMult):       GB_AemultB__plus_uint8
+// A*D function (colscale):         GB_AxD__plus_uint8
+// D*A function (rowscale):         GB_DxB__plus_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__plus_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__plus_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__plus_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__plus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x + y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__plus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__plus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__plus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__plus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_fp32.c b/Source/Generated/GB_binop__rdiv_fp32.c
index e3ecbcbbc0..525aea069c 100644
--- a/Source/Generated/GB_binop__rdiv_fp32.c
+++ b/Source/Generated/GB_binop__rdiv_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_fp32
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_fp32
-// A*D function (colscale):    GB_AxD__rdiv_fp32
-// D*A function (rowscale):    GB_DxB__rdiv_fp32
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_fp32
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_fp32
+// A*D function (colscale):         GB_AxD__rdiv_fp32
+// D*A function (rowscale):         GB_DxB__rdiv_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y / x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_FP32 || GxB_NO_RDIV_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_fp64.c b/Source/Generated/GB_binop__rdiv_fp64.c
index 34634dbcc7..1560136411 100644
--- a/Source/Generated/GB_binop__rdiv_fp64.c
+++ b/Source/Generated/GB_binop__rdiv_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_fp64
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_fp64
-// A*D function (colscale):    GB_AxD__rdiv_fp64
-// D*A function (rowscale):    GB_DxB__rdiv_fp64
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_fp64
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_fp64
+// A*D function (colscale):         GB_AxD__rdiv_fp64
+// D*A function (rowscale):         GB_DxB__rdiv_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y / x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_FP64 || GxB_NO_RDIV_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_int16.c b/Source/Generated/GB_binop__rdiv_int16.c
index dc6f4bc987..5be2e6e876 100644
--- a/Source/Generated/GB_binop__rdiv_int16.c
+++ b/Source/Generated/GB_binop__rdiv_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_int16
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_int16
-// A*D function (colscale):    GB_AxD__rdiv_int16
-// D*A function (rowscale):    GB_DxB__rdiv_int16
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_int16
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_int16
+// A*D function (colscale):         GB_AxD__rdiv_int16
+// D*A function (rowscale):         GB_DxB__rdiv_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (y, x, 16) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_INT16 || GxB_NO_RDIV_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_int32.c b/Source/Generated/GB_binop__rdiv_int32.c
index 8ef93430c1..b8ba4473bc 100644
--- a/Source/Generated/GB_binop__rdiv_int32.c
+++ b/Source/Generated/GB_binop__rdiv_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_int32
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_int32
-// A*D function (colscale):    GB_AxD__rdiv_int32
-// D*A function (rowscale):    GB_DxB__rdiv_int32
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_int32
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_int32
+// A*D function (colscale):         GB_AxD__rdiv_int32
+// D*A function (rowscale):         GB_DxB__rdiv_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (y, x, 32) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_INT32 || GxB_NO_RDIV_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_int64.c b/Source/Generated/GB_binop__rdiv_int64.c
index 7772e45977..fed70f57f6 100644
--- a/Source/Generated/GB_binop__rdiv_int64.c
+++ b/Source/Generated/GB_binop__rdiv_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_int64
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_int64
-// A*D function (colscale):    GB_AxD__rdiv_int64
-// D*A function (rowscale):    GB_DxB__rdiv_int64
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_int64
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_int64
+// A*D function (colscale):         GB_AxD__rdiv_int64
+// D*A function (rowscale):         GB_DxB__rdiv_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (y, x, 64) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_INT64 || GxB_NO_RDIV_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_int8.c b/Source/Generated/GB_binop__rdiv_int8.c
index 97d3e99cd4..a9669c13ec 100644
--- a/Source/Generated/GB_binop__rdiv_int8.c
+++ b/Source/Generated/GB_binop__rdiv_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_int8
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_int8
-// A*D function (colscale):    GB_AxD__rdiv_int8
-// D*A function (rowscale):    GB_DxB__rdiv_int8
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_int8
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_int8
+// A*D function (colscale):         GB_AxD__rdiv_int8
+// D*A function (rowscale):         GB_DxB__rdiv_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_SIGNED (y, x, 8) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_INT8 || GxB_NO_RDIV_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_uint16.c b/Source/Generated/GB_binop__rdiv_uint16.c
index 52f6047b21..7eb3a54e52 100644
--- a/Source/Generated/GB_binop__rdiv_uint16.c
+++ b/Source/Generated/GB_binop__rdiv_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_uint16
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_uint16
-// A*D function (colscale):    GB_AxD__rdiv_uint16
-// D*A function (rowscale):    GB_DxB__rdiv_uint16
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_uint16
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_uint16
+// A*D function (colscale):         GB_AxD__rdiv_uint16
+// D*A function (rowscale):         GB_DxB__rdiv_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (y, x, 16) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_UINT16 || GxB_NO_RDIV_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_uint32.c b/Source/Generated/GB_binop__rdiv_uint32.c
index 3a46816558..de9f41cf34 100644
--- a/Source/Generated/GB_binop__rdiv_uint32.c
+++ b/Source/Generated/GB_binop__rdiv_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_uint32
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_uint32
-// A*D function (colscale):    GB_AxD__rdiv_uint32
-// D*A function (rowscale):    GB_DxB__rdiv_uint32
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_uint32
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_uint32
+// A*D function (colscale):         GB_AxD__rdiv_uint32
+// D*A function (rowscale):         GB_DxB__rdiv_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (y, x, 32) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_UINT32 || GxB_NO_RDIV_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_uint64.c b/Source/Generated/GB_binop__rdiv_uint64.c
index e1e9b61d96..38f6bc9acf 100644
--- a/Source/Generated/GB_binop__rdiv_uint64.c
+++ b/Source/Generated/GB_binop__rdiv_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_uint64
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_uint64
-// A*D function (colscale):    GB_AxD__rdiv_uint64
-// D*A function (rowscale):    GB_DxB__rdiv_uint64
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_uint64
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_uint64
+// A*D function (colscale):         GB_AxD__rdiv_uint64
+// D*A function (rowscale):         GB_DxB__rdiv_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (y, x, 64) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_UINT64 || GxB_NO_RDIV_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rdiv_uint8.c b/Source/Generated/GB_binop__rdiv_uint8.c
index 8ae96f74c1..113313e9bc 100644
--- a/Source/Generated/GB_binop__rdiv_uint8.c
+++ b/Source/Generated/GB_binop__rdiv_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rdiv_uint8
-// A.*B function (eWiseMult):  GB_AemultB__rdiv_uint8
-// A*D function (colscale):    GB_AxD__rdiv_uint8
-// D*A function (rowscale):    GB_DxB__rdiv_uint8
+// A+B function (eWiseAdd):         GB_AaddB__rdiv_uint8
+// A.*B function (eWiseMult):       GB_AemultB__rdiv_uint8
+// A*D function (colscale):         GB_AxD__rdiv_uint8
+// D*A function (rowscale):         GB_DxB__rdiv_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__rdiv_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__rdiv_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rdiv_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rdiv_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = GB_IDIV_UNSIGNED (y, x, 8) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RDIV || GxB_NO_UINT8 || GxB_NO_RDIV_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rdiv_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rdiv_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rdiv_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_fp32.c b/Source/Generated/GB_binop__rminus_fp32.c
index 8d18f8c671..edbbffe2ef 100644
--- a/Source/Generated/GB_binop__rminus_fp32.c
+++ b/Source/Generated/GB_binop__rminus_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_fp32
-// A.*B function (eWiseMult):  GB_AemultB__rminus_fp32
-// A*D function (colscale):    GB_AxD__rminus_fp32
-// D*A function (rowscale):    GB_DxB__rminus_fp32
+// A+B function (eWiseAdd):         GB_AaddB__rminus_fp32
+// A.*B function (eWiseMult):       GB_AemultB__rminus_fp32
+// A*D function (colscale):         GB_AxD__rminus_fp32
+// D*A function (rowscale):         GB_DxB__rminus_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_FP32 || GxB_NO_RMINUS_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_fp64.c b/Source/Generated/GB_binop__rminus_fp64.c
index 31d3bd74cb..4c81fd2a31 100644
--- a/Source/Generated/GB_binop__rminus_fp64.c
+++ b/Source/Generated/GB_binop__rminus_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_fp64
-// A.*B function (eWiseMult):  GB_AemultB__rminus_fp64
-// A*D function (colscale):    GB_AxD__rminus_fp64
-// D*A function (rowscale):    GB_DxB__rminus_fp64
+// A+B function (eWiseAdd):         GB_AaddB__rminus_fp64
+// A.*B function (eWiseMult):       GB_AemultB__rminus_fp64
+// A*D function (colscale):         GB_AxD__rminus_fp64
+// D*A function (rowscale):         GB_DxB__rminus_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_FP64 || GxB_NO_RMINUS_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_int16.c b/Source/Generated/GB_binop__rminus_int16.c
index fae93967fd..91a908690b 100644
--- a/Source/Generated/GB_binop__rminus_int16.c
+++ b/Source/Generated/GB_binop__rminus_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_int16
-// A.*B function (eWiseMult):  GB_AemultB__rminus_int16
-// A*D function (colscale):    GB_AxD__rminus_int16
-// D*A function (rowscale):    GB_DxB__rminus_int16
+// A+B function (eWiseAdd):         GB_AaddB__rminus_int16
+// A.*B function (eWiseMult):       GB_AemultB__rminus_int16
+// A*D function (colscale):         GB_AxD__rminus_int16
+// D*A function (rowscale):         GB_DxB__rminus_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_INT16 || GxB_NO_RMINUS_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_int32.c b/Source/Generated/GB_binop__rminus_int32.c
index 7681236ee0..5a8d6d8044 100644
--- a/Source/Generated/GB_binop__rminus_int32.c
+++ b/Source/Generated/GB_binop__rminus_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_int32
-// A.*B function (eWiseMult):  GB_AemultB__rminus_int32
-// A*D function (colscale):    GB_AxD__rminus_int32
-// D*A function (rowscale):    GB_DxB__rminus_int32
+// A+B function (eWiseAdd):         GB_AaddB__rminus_int32
+// A.*B function (eWiseMult):       GB_AemultB__rminus_int32
+// A*D function (colscale):         GB_AxD__rminus_int32
+// D*A function (rowscale):         GB_DxB__rminus_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_INT32 || GxB_NO_RMINUS_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_int64.c b/Source/Generated/GB_binop__rminus_int64.c
index a13be7fe1b..3f629fea87 100644
--- a/Source/Generated/GB_binop__rminus_int64.c
+++ b/Source/Generated/GB_binop__rminus_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_int64
-// A.*B function (eWiseMult):  GB_AemultB__rminus_int64
-// A*D function (colscale):    GB_AxD__rminus_int64
-// D*A function (rowscale):    GB_DxB__rminus_int64
+// A+B function (eWiseAdd):         GB_AaddB__rminus_int64
+// A.*B function (eWiseMult):       GB_AemultB__rminus_int64
+// A*D function (colscale):         GB_AxD__rminus_int64
+// D*A function (rowscale):         GB_DxB__rminus_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_INT64 || GxB_NO_RMINUS_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_int8.c b/Source/Generated/GB_binop__rminus_int8.c
index 1943d8be56..a6d3ad36ba 100644
--- a/Source/Generated/GB_binop__rminus_int8.c
+++ b/Source/Generated/GB_binop__rminus_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_int8
-// A.*B function (eWiseMult):  GB_AemultB__rminus_int8
-// A*D function (colscale):    GB_AxD__rminus_int8
-// D*A function (rowscale):    GB_DxB__rminus_int8
+// A+B function (eWiseAdd):         GB_AaddB__rminus_int8
+// A.*B function (eWiseMult):       GB_AemultB__rminus_int8
+// A*D function (colscale):         GB_AxD__rminus_int8
+// D*A function (rowscale):         GB_DxB__rminus_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_INT8 || GxB_NO_RMINUS_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_uint16.c b/Source/Generated/GB_binop__rminus_uint16.c
index 92f2c42eef..c9eb81277b 100644
--- a/Source/Generated/GB_binop__rminus_uint16.c
+++ b/Source/Generated/GB_binop__rminus_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_uint16
-// A.*B function (eWiseMult):  GB_AemultB__rminus_uint16
-// A*D function (colscale):    GB_AxD__rminus_uint16
-// D*A function (rowscale):    GB_DxB__rminus_uint16
+// A+B function (eWiseAdd):         GB_AaddB__rminus_uint16
+// A.*B function (eWiseMult):       GB_AemultB__rminus_uint16
+// A*D function (colscale):         GB_AxD__rminus_uint16
+// D*A function (rowscale):         GB_DxB__rminus_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_UINT16 || GxB_NO_RMINUS_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_uint32.c b/Source/Generated/GB_binop__rminus_uint32.c
index c5be1016ad..9be86cb833 100644
--- a/Source/Generated/GB_binop__rminus_uint32.c
+++ b/Source/Generated/GB_binop__rminus_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_uint32
-// A.*B function (eWiseMult):  GB_AemultB__rminus_uint32
-// A*D function (colscale):    GB_AxD__rminus_uint32
-// D*A function (rowscale):    GB_DxB__rminus_uint32
+// A+B function (eWiseAdd):         GB_AaddB__rminus_uint32
+// A.*B function (eWiseMult):       GB_AemultB__rminus_uint32
+// A*D function (colscale):         GB_AxD__rminus_uint32
+// D*A function (rowscale):         GB_DxB__rminus_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_UINT32 || GxB_NO_RMINUS_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_uint64.c b/Source/Generated/GB_binop__rminus_uint64.c
index 78b7ce7fdf..5fa30e9cce 100644
--- a/Source/Generated/GB_binop__rminus_uint64.c
+++ b/Source/Generated/GB_binop__rminus_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_uint64
-// A.*B function (eWiseMult):  GB_AemultB__rminus_uint64
-// A*D function (colscale):    GB_AxD__rminus_uint64
-// D*A function (rowscale):    GB_DxB__rminus_uint64
+// A+B function (eWiseAdd):         GB_AaddB__rminus_uint64
+// A.*B function (eWiseMult):       GB_AemultB__rminus_uint64
+// A*D function (colscale):         GB_AxD__rminus_uint64
+// D*A function (rowscale):         GB_DxB__rminus_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_UINT64 || GxB_NO_RMINUS_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__rminus_uint8.c b/Source/Generated/GB_binop__rminus_uint8.c
index c442e60f7c..7bc712b6c7 100644
--- a/Source/Generated/GB_binop__rminus_uint8.c
+++ b/Source/Generated/GB_binop__rminus_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__rminus_uint8
-// A.*B function (eWiseMult):  GB_AemultB__rminus_uint8
-// A*D function (colscale):    GB_AxD__rminus_uint8
-// D*A function (rowscale):    GB_DxB__rminus_uint8
+// A+B function (eWiseAdd):         GB_AaddB__rminus_uint8
+// A.*B function (eWiseMult):       GB_AemultB__rminus_uint8
+// A*D function (colscale):         GB_AxD__rminus_uint8
+// D*A function (rowscale):         GB_DxB__rminus_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__rminus_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__rminus_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__rminus_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__rminus_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (y - x) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_RMINUS || GxB_NO_UINT8 || GxB_NO_RMINUS_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__rminus_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__rminus_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__rminus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__rminus_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_bool.c b/Source/Generated/GB_binop__second_bool.c
index a0223cb8e8..cab4024f4d 100644
--- a/Source/Generated/GB_binop__second_bool.c
+++ b/Source/Generated/GB_binop__second_bool.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_bool
-// A.*B function (eWiseMult):  GB_AemultB__second_bool
-// A*D function (colscale):    GB_AxD__second_bool
-// D*A function (rowscale):    GB_DxB__second_bool
+// A+B function (eWiseAdd):         GB_AaddB__second_bool
+// A.*B function (eWiseMult):       GB_AemultB__second_bool
+// A*D function (colscale):         GB_AxD__second_bool
+// D*A function (rowscale):         GB_DxB__second_bool
+// C+=A function (dense accum):     GB_Cdense_accumA__second_bool
+// C+=x function (dense accum):     GB_Cdense_accumX__second_bool
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_bool
 
 // C type:   bool
 // A type:   bool
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     bool bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    bool t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_SECOND_BOOL)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_bool
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        bool ywork = (*((bool *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_bool
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_fp32.c b/Source/Generated/GB_binop__second_fp32.c
index 5c8c360a33..5b2c550ce1 100644
--- a/Source/Generated/GB_binop__second_fp32.c
+++ b/Source/Generated/GB_binop__second_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_fp32
-// A.*B function (eWiseMult):  GB_AemultB__second_fp32
-// A*D function (colscale):    GB_AxD__second_fp32
-// D*A function (rowscale):    GB_DxB__second_fp32
+// A+B function (eWiseAdd):         GB_AaddB__second_fp32
+// A.*B function (eWiseMult):       GB_AemultB__second_fp32
+// A*D function (colscale):         GB_AxD__second_fp32
+// D*A function (rowscale):         GB_DxB__second_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__second_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__second_fp32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_SECOND_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_fp64.c b/Source/Generated/GB_binop__second_fp64.c
index e43217b78c..d8275da7ad 100644
--- a/Source/Generated/GB_binop__second_fp64.c
+++ b/Source/Generated/GB_binop__second_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_fp64
-// A.*B function (eWiseMult):  GB_AemultB__second_fp64
-// A*D function (colscale):    GB_AxD__second_fp64
-// D*A function (rowscale):    GB_DxB__second_fp64
+// A+B function (eWiseAdd):         GB_AaddB__second_fp64
+// A.*B function (eWiseMult):       GB_AemultB__second_fp64
+// A*D function (colscale):         GB_AxD__second_fp64
+// D*A function (rowscale):         GB_DxB__second_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__second_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__second_fp64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_SECOND_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_int16.c b/Source/Generated/GB_binop__second_int16.c
index 48bf87b24b..ba2f7cfa07 100644
--- a/Source/Generated/GB_binop__second_int16.c
+++ b/Source/Generated/GB_binop__second_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_int16
-// A.*B function (eWiseMult):  GB_AemultB__second_int16
-// A*D function (colscale):    GB_AxD__second_int16
-// D*A function (rowscale):    GB_DxB__second_int16
+// A+B function (eWiseAdd):         GB_AaddB__second_int16
+// A.*B function (eWiseMult):       GB_AemultB__second_int16
+// A*D function (colscale):         GB_AxD__second_int16
+// D*A function (rowscale):         GB_DxB__second_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__second_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__second_int16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_SECOND_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_int32.c b/Source/Generated/GB_binop__second_int32.c
index f6901fd3b4..d272216835 100644
--- a/Source/Generated/GB_binop__second_int32.c
+++ b/Source/Generated/GB_binop__second_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_int32
-// A.*B function (eWiseMult):  GB_AemultB__second_int32
-// A*D function (colscale):    GB_AxD__second_int32
-// D*A function (rowscale):    GB_DxB__second_int32
+// A+B function (eWiseAdd):         GB_AaddB__second_int32
+// A.*B function (eWiseMult):       GB_AemultB__second_int32
+// A*D function (colscale):         GB_AxD__second_int32
+// D*A function (rowscale):         GB_DxB__second_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__second_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__second_int32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_SECOND_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_int64.c b/Source/Generated/GB_binop__second_int64.c
index f6b25a1e82..0e718e6b50 100644
--- a/Source/Generated/GB_binop__second_int64.c
+++ b/Source/Generated/GB_binop__second_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_int64
-// A.*B function (eWiseMult):  GB_AemultB__second_int64
-// A*D function (colscale):    GB_AxD__second_int64
-// D*A function (rowscale):    GB_DxB__second_int64
+// A+B function (eWiseAdd):         GB_AaddB__second_int64
+// A.*B function (eWiseMult):       GB_AemultB__second_int64
+// A*D function (colscale):         GB_AxD__second_int64
+// D*A function (rowscale):         GB_DxB__second_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__second_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__second_int64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_SECOND_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_int8.c b/Source/Generated/GB_binop__second_int8.c
index 33aeaa37f0..1111b2b9b3 100644
--- a/Source/Generated/GB_binop__second_int8.c
+++ b/Source/Generated/GB_binop__second_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_int8
-// A.*B function (eWiseMult):  GB_AemultB__second_int8
-// A*D function (colscale):    GB_AxD__second_int8
-// D*A function (rowscale):    GB_DxB__second_int8
+// A+B function (eWiseAdd):         GB_AaddB__second_int8
+// A.*B function (eWiseMult):       GB_AemultB__second_int8
+// A*D function (colscale):         GB_AxD__second_int8
+// D*A function (rowscale):         GB_DxB__second_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__second_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__second_int8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_SECOND_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_uint16.c b/Source/Generated/GB_binop__second_uint16.c
index fde983a396..7e9527c712 100644
--- a/Source/Generated/GB_binop__second_uint16.c
+++ b/Source/Generated/GB_binop__second_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_uint16
-// A.*B function (eWiseMult):  GB_AemultB__second_uint16
-// A*D function (colscale):    GB_AxD__second_uint16
-// D*A function (rowscale):    GB_DxB__second_uint16
+// A+B function (eWiseAdd):         GB_AaddB__second_uint16
+// A.*B function (eWiseMult):       GB_AemultB__second_uint16
+// A*D function (colscale):         GB_AxD__second_uint16
+// D*A function (rowscale):         GB_DxB__second_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__second_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__second_uint16
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_SECOND_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_uint32.c b/Source/Generated/GB_binop__second_uint32.c
index 2eadfd0078..b8227e2343 100644
--- a/Source/Generated/GB_binop__second_uint32.c
+++ b/Source/Generated/GB_binop__second_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_uint32
-// A.*B function (eWiseMult):  GB_AemultB__second_uint32
-// A*D function (colscale):    GB_AxD__second_uint32
-// D*A function (rowscale):    GB_DxB__second_uint32
+// A+B function (eWiseAdd):         GB_AaddB__second_uint32
+// A.*B function (eWiseMult):       GB_AemultB__second_uint32
+// A*D function (colscale):         GB_AxD__second_uint32
+// D*A function (rowscale):         GB_DxB__second_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__second_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__second_uint32
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_SECOND_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_uint64.c b/Source/Generated/GB_binop__second_uint64.c
index d6a79eb529..5e8bcb088d 100644
--- a/Source/Generated/GB_binop__second_uint64.c
+++ b/Source/Generated/GB_binop__second_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_uint64
-// A.*B function (eWiseMult):  GB_AemultB__second_uint64
-// A*D function (colscale):    GB_AxD__second_uint64
-// D*A function (rowscale):    GB_DxB__second_uint64
+// A+B function (eWiseAdd):         GB_AaddB__second_uint64
+// A.*B function (eWiseMult):       GB_AemultB__second_uint64
+// A*D function (colscale):         GB_AxD__second_uint64
+// D*A function (rowscale):         GB_DxB__second_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__second_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__second_uint64
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_SECOND_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__second_uint8.c b/Source/Generated/GB_binop__second_uint8.c
index cbf8d12f98..a700a88f5e 100644
--- a/Source/Generated/GB_binop__second_uint8.c
+++ b/Source/Generated/GB_binop__second_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__second_uint8
-// A.*B function (eWiseMult):  GB_AemultB__second_uint8
-// A*D function (colscale):    GB_AxD__second_uint8
-// D*A function (rowscale):    GB_DxB__second_uint8
+// A+B function (eWiseAdd):         GB_AaddB__second_uint8
+// A.*B function (eWiseMult):       GB_AemultB__second_uint8
+// A*D function (colscale):         GB_AxD__second_uint8
+// D*A function (rowscale):         GB_DxB__second_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__second_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__second_uint8
+// C+=A+B function (dense ewise3):  (none)
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__second_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = y ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    1
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_SECOND_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+#if 0
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void (none)
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__second_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__second_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__second_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__second_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_fp32.c b/Source/Generated/GB_binop__times_fp32.c
index 63b57b947b..d8b764184b 100644
--- a/Source/Generated/GB_binop__times_fp32.c
+++ b/Source/Generated/GB_binop__times_fp32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_fp32
-// A.*B function (eWiseMult):  GB_AemultB__times_fp32
-// A*D function (colscale):    GB_AxD__times_fp32
-// D*A function (rowscale):    GB_DxB__times_fp32
+// A+B function (eWiseAdd):         GB_AaddB__times_fp32
+// A.*B function (eWiseMult):       GB_AemultB__times_fp32
+// A*D function (colscale):         GB_AxD__times_fp32
+// D*A function (rowscale):         GB_DxB__times_fp32
+// C+=A function (dense accum):     GB_Cdense_accumA__times_fp32
+// C+=x function (dense accum):     GB_Cdense_accumX__times_fp32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_fp32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_fp32
 
 // C type:   float
 // A type:   float
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     float bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    float t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_TIMES_FP32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_fp32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        float ywork = (*((float *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_fp32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_fp64.c b/Source/Generated/GB_binop__times_fp64.c
index 3985af1d76..254d81d5e6 100644
--- a/Source/Generated/GB_binop__times_fp64.c
+++ b/Source/Generated/GB_binop__times_fp64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_fp64
-// A.*B function (eWiseMult):  GB_AemultB__times_fp64
-// A*D function (colscale):    GB_AxD__times_fp64
-// D*A function (rowscale):    GB_DxB__times_fp64
+// A+B function (eWiseAdd):         GB_AaddB__times_fp64
+// A.*B function (eWiseMult):       GB_AemultB__times_fp64
+// A*D function (colscale):         GB_AxD__times_fp64
+// D*A function (rowscale):         GB_DxB__times_fp64
+// C+=A function (dense accum):     GB_Cdense_accumA__times_fp64
+// C+=x function (dense accum):     GB_Cdense_accumX__times_fp64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_fp64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_fp64
 
 // C type:   double
 // A type:   double
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     double bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    double t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_TIMES_FP64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_fp64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        double ywork = (*((double *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_fp64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_int16.c b/Source/Generated/GB_binop__times_int16.c
index 9c6c13cd60..6d9dc137d6 100644
--- a/Source/Generated/GB_binop__times_int16.c
+++ b/Source/Generated/GB_binop__times_int16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_int16
-// A.*B function (eWiseMult):  GB_AemultB__times_int16
-// A*D function (colscale):    GB_AxD__times_int16
-// D*A function (rowscale):    GB_DxB__times_int16
+// A+B function (eWiseAdd):         GB_AaddB__times_int16
+// A.*B function (eWiseMult):       GB_AemultB__times_int16
+// A*D function (colscale):         GB_AxD__times_int16
+// D*A function (rowscale):         GB_DxB__times_int16
+// C+=A function (dense accum):     GB_Cdense_accumA__times_int16
+// C+=x function (dense accum):     GB_Cdense_accumX__times_int16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_int16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_int16
 
 // C type:   int16_t
 // A type:   int16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_TIMES_INT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_int16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int16_t ywork = (*((int16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_int16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_int32.c b/Source/Generated/GB_binop__times_int32.c
index 81241b05bc..c65eb61933 100644
--- a/Source/Generated/GB_binop__times_int32.c
+++ b/Source/Generated/GB_binop__times_int32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_int32
-// A.*B function (eWiseMult):  GB_AemultB__times_int32
-// A*D function (colscale):    GB_AxD__times_int32
-// D*A function (rowscale):    GB_DxB__times_int32
+// A+B function (eWiseAdd):         GB_AaddB__times_int32
+// A.*B function (eWiseMult):       GB_AemultB__times_int32
+// A*D function (colscale):         GB_AxD__times_int32
+// D*A function (rowscale):         GB_DxB__times_int32
+// C+=A function (dense accum):     GB_Cdense_accumA__times_int32
+// C+=x function (dense accum):     GB_Cdense_accumX__times_int32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_int32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_int32
 
 // C type:   int32_t
 // A type:   int32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_TIMES_INT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_int32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int32_t ywork = (*((int32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_int32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_int64.c b/Source/Generated/GB_binop__times_int64.c
index 6297eaff0b..7c88850972 100644
--- a/Source/Generated/GB_binop__times_int64.c
+++ b/Source/Generated/GB_binop__times_int64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_int64
-// A.*B function (eWiseMult):  GB_AemultB__times_int64
-// A*D function (colscale):    GB_AxD__times_int64
-// D*A function (rowscale):    GB_DxB__times_int64
+// A+B function (eWiseAdd):         GB_AaddB__times_int64
+// A.*B function (eWiseMult):       GB_AemultB__times_int64
+// A*D function (colscale):         GB_AxD__times_int64
+// D*A function (rowscale):         GB_DxB__times_int64
+// C+=A function (dense accum):     GB_Cdense_accumA__times_int64
+// C+=x function (dense accum):     GB_Cdense_accumX__times_int64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_int64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_int64
 
 // C type:   int64_t
 // A type:   int64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_TIMES_INT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_int64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int64_t ywork = (*((int64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_int64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_int8.c b/Source/Generated/GB_binop__times_int8.c
index 2a95efc9a7..b19be2d582 100644
--- a/Source/Generated/GB_binop__times_int8.c
+++ b/Source/Generated/GB_binop__times_int8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_int8
-// A.*B function (eWiseMult):  GB_AemultB__times_int8
-// A*D function (colscale):    GB_AxD__times_int8
-// D*A function (rowscale):    GB_DxB__times_int8
+// A+B function (eWiseAdd):         GB_AaddB__times_int8
+// A.*B function (eWiseMult):       GB_AemultB__times_int8
+// A*D function (colscale):         GB_AxD__times_int8
+// D*A function (rowscale):         GB_DxB__times_int8
+// C+=A function (dense accum):     GB_Cdense_accumA__times_int8
+// C+=x function (dense accum):     GB_Cdense_accumX__times_int8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_int8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_int8
 
 // C type:   int8_t
 // A type:   int8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     int8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    int8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_TIMES_INT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_int8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        int8_t ywork = (*((int8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_int8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_uint16.c b/Source/Generated/GB_binop__times_uint16.c
index 27eda9e43c..3aa3c4f098 100644
--- a/Source/Generated/GB_binop__times_uint16.c
+++ b/Source/Generated/GB_binop__times_uint16.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_uint16
-// A.*B function (eWiseMult):  GB_AemultB__times_uint16
-// A*D function (colscale):    GB_AxD__times_uint16
-// D*A function (rowscale):    GB_DxB__times_uint16
+// A+B function (eWiseAdd):         GB_AaddB__times_uint16
+// A.*B function (eWiseMult):       GB_AemultB__times_uint16
+// A*D function (colscale):         GB_AxD__times_uint16
+// D*A function (rowscale):         GB_DxB__times_uint16
+// C+=A function (dense accum):     GB_Cdense_accumA__times_uint16
+// C+=x function (dense accum):     GB_Cdense_accumX__times_uint16
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_uint16
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_uint16
 
 // C type:   uint16_t
 // A type:   uint16_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint16_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint16_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_uint16
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint16_t ywork = (*((uint16_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_uint16
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_uint32.c b/Source/Generated/GB_binop__times_uint32.c
index 1547bc6534..f25c7ab6d0 100644
--- a/Source/Generated/GB_binop__times_uint32.c
+++ b/Source/Generated/GB_binop__times_uint32.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_uint32
-// A.*B function (eWiseMult):  GB_AemultB__times_uint32
-// A*D function (colscale):    GB_AxD__times_uint32
-// D*A function (rowscale):    GB_DxB__times_uint32
+// A+B function (eWiseAdd):         GB_AaddB__times_uint32
+// A.*B function (eWiseMult):       GB_AemultB__times_uint32
+// A*D function (colscale):         GB_AxD__times_uint32
+// D*A function (rowscale):         GB_DxB__times_uint32
+// C+=A function (dense accum):     GB_Cdense_accumA__times_uint32
+// C+=x function (dense accum):     GB_Cdense_accumX__times_uint32
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_uint32
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_uint32
 
 // C type:   uint32_t
 // A type:   uint32_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint32_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint32_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_uint32
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint32_t ywork = (*((uint32_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_uint32
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_uint64.c b/Source/Generated/GB_binop__times_uint64.c
index 0f2d428560..0fec1fd1e0 100644
--- a/Source/Generated/GB_binop__times_uint64.c
+++ b/Source/Generated/GB_binop__times_uint64.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_uint64
-// A.*B function (eWiseMult):  GB_AemultB__times_uint64
-// A*D function (colscale):    GB_AxD__times_uint64
-// D*A function (rowscale):    GB_DxB__times_uint64
+// A+B function (eWiseAdd):         GB_AaddB__times_uint64
+// A.*B function (eWiseMult):       GB_AemultB__times_uint64
+// A*D function (colscale):         GB_AxD__times_uint64
+// D*A function (rowscale):         GB_DxB__times_uint64
+// C+=A function (dense accum):     GB_Cdense_accumA__times_uint64
+// C+=x function (dense accum):     GB_Cdense_accumX__times_uint64
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_uint64
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_uint64
 
 // C type:   uint64_t
 // A type:   uint64_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint64_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint64_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_uint64
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint64_t ywork = (*((uint64_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_uint64
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_binop__times_uint8.c b/Source/Generated/GB_binop__times_uint8.c
index 2e9f76ddc6..8582ba7ea3 100644
--- a/Source/Generated/GB_binop__times_uint8.c
+++ b/Source/Generated/GB_binop__times_uint8.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB__times_uint8
-// A.*B function (eWiseMult):  GB_AemultB__times_uint8
-// A*D function (colscale):    GB_AxD__times_uint8
-// D*A function (rowscale):    GB_DxB__times_uint8
+// A+B function (eWiseAdd):         GB_AaddB__times_uint8
+// A.*B function (eWiseMult):       GB_AemultB__times_uint8
+// A*D function (colscale):         GB_AxD__times_uint8
+// D*A function (rowscale):         GB_DxB__times_uint8
+// C+=A function (dense accum):     GB_Cdense_accumA__times_uint8
+// C+=x function (dense accum):     GB_Cdense_accumX__times_uint8
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum__times_uint8
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum__times_uint8
 
 // C type:   uint8_t
 // A type:   uint8_t
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     uint8_t bij = Bx [pB]
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    uint8_t t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     z = (x * y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    0
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    0
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    0
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    (none)
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8)
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA__times_uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX__times_uint8
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    
+    { 
+        uint8_t ywork = (*((uint8_t *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB__times_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB__times_uint8
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generated/GB_red__any_bool.c b/Source/Generated/GB_red__any_bool.c
new file mode 100644
index 0000000000..787f2467ae
--- /dev/null
+++ b/Source/Generated/GB_red__any_bool.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_bool
+// Reduce to scalar:   GB_red_scalar__any_bool
+// Reduce each vector: GB_red_eachvec__any_bool
+// Reduce each index:  GB_red_eachindex__any_bool
+
+// A type:   bool
+// C type:   bool
+
+// Reduce:   s = (aij)
+// Identity: false
+// Terminal: break ;
+
+#define GB_ATYPE \
+    bool
+
+#define GB_CTYPE \
+    bool
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        bool s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = (S [i])
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = (S [i])
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = (Ax [p])
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = (S [i])
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = (s)
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_BOOL || GxB_NO_ANY_BOOL)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_bool
+(
+    bool *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    bool s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_bool
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const bool *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_fp32.c b/Source/Generated/GB_red__any_fp32.c
new file mode 100644
index 0000000000..eed1d2f58c
--- /dev/null
+++ b/Source/Generated/GB_red__any_fp32.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_fp32
+// Reduce to scalar:   GB_red_scalar__any_fp32
+// Reduce each vector: GB_red_eachvec__any_fp32
+// Reduce each index:  GB_red_eachindex__any_fp32
+
+// A type:   float
+// C type:   float
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    float
+
+#define GB_CTYPE \
+    float
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        float s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FP32 || GxB_NO_ANY_FP32)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_fp32
+(
+    float *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    float s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_fp32
+(
+    float *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_fp32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_fp32
+(
+    float *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const float *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_fp64.c b/Source/Generated/GB_red__any_fp64.c
new file mode 100644
index 0000000000..cb820c8aa2
--- /dev/null
+++ b/Source/Generated/GB_red__any_fp64.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_fp64
+// Reduce to scalar:   GB_red_scalar__any_fp64
+// Reduce each vector: GB_red_eachvec__any_fp64
+// Reduce each index:  GB_red_eachindex__any_fp64
+
+// A type:   double
+// C type:   double
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    double
+
+#define GB_CTYPE \
+    double
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        double s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_FP64 || GxB_NO_ANY_FP64)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_fp64
+(
+    double *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    double s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_fp64
+(
+    double *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_fp64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_fp64
+(
+    double *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const double *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_int16.c b/Source/Generated/GB_red__any_int16.c
new file mode 100644
index 0000000000..3937e61120
--- /dev/null
+++ b/Source/Generated/GB_red__any_int16.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_int16
+// Reduce to scalar:   GB_red_scalar__any_int16
+// Reduce each vector: GB_red_eachvec__any_int16
+// Reduce each index:  GB_red_eachindex__any_int16
+
+// A type:   int16_t
+// C type:   int16_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        int16_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_INT16 || GxB_NO_ANY_INT16)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_int16
+(
+    int16_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int16_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_int16
+(
+    int16_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_int16
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_int16
+(
+    int16_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int16_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_int32.c b/Source/Generated/GB_red__any_int32.c
new file mode 100644
index 0000000000..a5e565af96
--- /dev/null
+++ b/Source/Generated/GB_red__any_int32.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_int32
+// Reduce to scalar:   GB_red_scalar__any_int32
+// Reduce each vector: GB_red_eachvec__any_int32
+// Reduce each index:  GB_red_eachindex__any_int32
+
+// A type:   int32_t
+// C type:   int32_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        int32_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_INT32 || GxB_NO_ANY_INT32)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_int32
+(
+    int32_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int32_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_int32
+(
+    int32_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_int32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_int32
+(
+    int32_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int32_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_int64.c b/Source/Generated/GB_red__any_int64.c
new file mode 100644
index 0000000000..70ccc212ab
--- /dev/null
+++ b/Source/Generated/GB_red__any_int64.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_int64
+// Reduce to scalar:   GB_red_scalar__any_int64
+// Reduce each vector: GB_red_eachvec__any_int64
+// Reduce each index:  GB_red_eachindex__any_int64
+
+// A type:   int64_t
+// C type:   int64_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        int64_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_INT64 || GxB_NO_ANY_INT64)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_int64
+(
+    int64_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int64_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_int64
+(
+    int64_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_int64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_int64
+(
+    int64_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int64_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_int8.c b/Source/Generated/GB_red__any_int8.c
new file mode 100644
index 0000000000..41de8479ee
--- /dev/null
+++ b/Source/Generated/GB_red__any_int8.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_int8
+// Reduce to scalar:   GB_red_scalar__any_int8
+// Reduce each vector: GB_red_eachvec__any_int8
+// Reduce each index:  GB_red_eachindex__any_int8
+
+// A type:   int8_t
+// C type:   int8_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        int8_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_INT8 || GxB_NO_ANY_INT8)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_int8
+(
+    int8_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int8_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_int8
+(
+    int8_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_int8
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_int8
+(
+    int8_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int8_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_uint16.c b/Source/Generated/GB_red__any_uint16.c
new file mode 100644
index 0000000000..1d40a49607
--- /dev/null
+++ b/Source/Generated/GB_red__any_uint16.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_uint16
+// Reduce to scalar:   GB_red_scalar__any_uint16
+// Reduce each vector: GB_red_eachvec__any_uint16
+// Reduce each index:  GB_red_eachindex__any_uint16
+
+// A type:   uint16_t
+// C type:   uint16_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        uint16_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_UINT16 || GxB_NO_ANY_UINT16)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_uint16
+(
+    uint16_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint16_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_uint16
+(
+    uint16_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_uint16
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_uint16
+(
+    uint16_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint16_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_uint32.c b/Source/Generated/GB_red__any_uint32.c
new file mode 100644
index 0000000000..fd4b071892
--- /dev/null
+++ b/Source/Generated/GB_red__any_uint32.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_uint32
+// Reduce to scalar:   GB_red_scalar__any_uint32
+// Reduce each vector: GB_red_eachvec__any_uint32
+// Reduce each index:  GB_red_eachindex__any_uint32
+
+// A type:   uint32_t
+// C type:   uint32_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        uint32_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_UINT32 || GxB_NO_ANY_UINT32)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_uint32
+(
+    uint32_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint32_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_uint32
+(
+    uint32_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_uint32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_uint32
+(
+    uint32_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint32_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_uint64.c b/Source/Generated/GB_red__any_uint64.c
new file mode 100644
index 0000000000..7fa05273e1
--- /dev/null
+++ b/Source/Generated/GB_red__any_uint64.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_uint64
+// Reduce to scalar:   GB_red_scalar__any_uint64
+// Reduce each vector: GB_red_eachvec__any_uint64
+// Reduce each index:  GB_red_eachindex__any_uint64
+
+// A type:   uint64_t
+// C type:   uint64_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        uint64_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_UINT64 || GxB_NO_ANY_UINT64)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_uint64
+(
+    uint64_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint64_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_uint64
+(
+    uint64_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_uint64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_uint64
+(
+    uint64_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint64_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__any_uint8.c b/Source/Generated/GB_red__any_uint8.c
new file mode 100644
index 0000000000..a0ddbd4083
--- /dev/null
+++ b/Source/Generated/GB_red__any_uint8.c
@@ -0,0 +1,230 @@
+//------------------------------------------------------------------------------
+// GB_red:  hard-coded functions for reductions
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_atomics.h"
+#include "GB_ek_slice.h"
+#include "GB_control.h" 
+#include "GB_red__include.h"
+
+// The reduction is defined by the following types and operators:
+
+// Assemble tuples:    GB_red_build__any_uint8
+// Reduce to scalar:   GB_red_scalar__any_uint8
+// Reduce each vector: GB_red_eachvec__any_uint8
+// Reduce each index:  GB_red_eachindex__any_uint8
+
+// A type:   uint8_t
+// C type:   uint8_t
+
+// Reduce:   s = aij
+// Identity: 0
+// Terminal: break ;
+
+#define GB_ATYPE \
+    uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+// declare scalar
+
+    #define GB_SCALAR(s)                            \
+        uint8_t s
+
+// Array to array
+
+    // W [k] = (ztype) S [i], with typecast
+    #define GB_CAST_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += (ztype) S [i], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_ARRAY(W,k,S,i)     \
+        W [k] = S [i]
+
+    // W [k] = S [i], no typecast
+    #define GB_COPY_ARRAY_TO_ARRAY(W,k,S,i)         \
+        W [k] = S [i]
+
+    // W [k] += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_ARRAY(W,k,S,i)          \
+        W [k] = S [i]
+
+// Array to scalar
+
+    // s = (ztype) Ax [p], with typecast
+    #define GB_CAST_ARRAY_TO_SCALAR(s,Ax,p)         \
+        s = Ax [p]
+
+    // s = W [k], no typecast
+    #define GB_COPY_ARRAY_TO_SCALAR(s,W,k)          \
+        s = W [k]
+
+    // s += (ztype) Ax [p], with typecast
+    #define GB_ADD_CAST_ARRAY_TO_SCALAR(s,Ax,p)     \
+        s = Ax [p]
+
+    // s += S [i], no typecast
+    #define GB_ADD_ARRAY_TO_SCALAR(s,S,i)           \
+        s = S [i]
+
+// Scalar to array
+
+    // W [k] = s, no typecast
+    #define GB_COPY_SCALAR_TO_ARRAY(W,k,s)          \
+        W [k] = s
+
+    // W [k] += s, no typecast
+    #define GB_ADD_SCALAR_TO_ARRAY(W,k,s)           \
+        W [k] = s
+
+// break the loop if terminal condition reached
+
+    #define GB_HAS_TERMINAL                         \
+        1
+
+    #define GB_TERMINAL_VALUE                       \
+        (any value)
+
+    #define GB_BREAK_IF_TERMINAL(t)                 \
+        break ;
+
+// panel size for built-in operators
+
+    #define GB_PANEL                                \
+        (no panel)
+
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        1
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_ANY || GxB_NO_UINT8 || GxB_NO_ANY_UINT8)
+
+//------------------------------------------------------------------------------
+// reduce to a scalar, for monoids only
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_scalar__any_uint8
+(
+    uint8_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint8_t s = (*result) ;
+    #include "GB_reduce_panel.c"
+    (*result) = s ;
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachvec__any_uint8
+(
+    uint8_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_each_vector.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// reduce to each index: each A(i,:) reduces to a scalar T (i)
+//------------------------------------------------------------------------------
+
+
+
+GrB_Info GB_red_eachindex__any_uint8
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GrB_Info info = GrB_SUCCESS ;
+    GrB_Matrix T = NULL ;
+    (*Thandle) = NULL ;
+    #define GB_FREE_ALL ;
+    #include "GB_reduce_each_index.c"
+    (*Thandle) = T ;
+    return (info) ;
+    #endif
+}
+
+
+
+//------------------------------------------------------------------------------
+// build matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_red_build__any_uint8
+(
+    uint8_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint8_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_reduce_build_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_red__eq_bool.c b/Source/Generated/GB_red__eq_bool.c
index eb59bc4e0d..01a9b1b10c 100644
--- a/Source/Generated/GB_red__eq_bool.c
+++ b/Source/Generated/GB_red__eq_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         8
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_EQ || GxB_NO_BOOL || GxB_NO_EQ_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__eq_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__eq_bool
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__eq_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__eq_bool
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_bool.c b/Source/Generated/GB_red__first_bool.c
index 09237ab5c2..6311f15ff7 100644
--- a/Source/Generated/GB_red__first_bool.c
+++ b/Source/Generated/GB_red__first_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_BOOL || GxB_NO_FIRST_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_fp32.c b/Source/Generated/GB_red__first_fp32.c
index 09628a9858..867fb5e364 100644
--- a/Source/Generated/GB_red__first_fp32.c
+++ b/Source/Generated/GB_red__first_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_FP32 || GxB_NO_FIRST_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_fp64.c b/Source/Generated/GB_red__first_fp64.c
index 6d7a1dfe16..0c47a1c166 100644
--- a/Source/Generated/GB_red__first_fp64.c
+++ b/Source/Generated/GB_red__first_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_FP64 || GxB_NO_FIRST_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_int16.c b/Source/Generated/GB_red__first_int16.c
index 61e54082a8..b69a79b57e 100644
--- a/Source/Generated/GB_red__first_int16.c
+++ b/Source/Generated/GB_red__first_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT16 || GxB_NO_FIRST_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_int32.c b/Source/Generated/GB_red__first_int32.c
index b3ebbc4f3a..aae7b2f20e 100644
--- a/Source/Generated/GB_red__first_int32.c
+++ b/Source/Generated/GB_red__first_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT32 || GxB_NO_FIRST_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_int64.c b/Source/Generated/GB_red__first_int64.c
index 718ab64f91..80451e9e47 100644
--- a/Source/Generated/GB_red__first_int64.c
+++ b/Source/Generated/GB_red__first_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT64 || GxB_NO_FIRST_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_int8.c b/Source/Generated/GB_red__first_int8.c
index 4d06cab178..b4e2617515 100644
--- a/Source/Generated/GB_red__first_int8.c
+++ b/Source/Generated/GB_red__first_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_INT8 || GxB_NO_FIRST_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_uint16.c b/Source/Generated/GB_red__first_uint16.c
index 9610b3faa0..5ad66d9fc6 100644
--- a/Source/Generated/GB_red__first_uint16.c
+++ b/Source/Generated/GB_red__first_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT16 || GxB_NO_FIRST_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_uint32.c b/Source/Generated/GB_red__first_uint32.c
index b2d435b626..e9dd76aa2f 100644
--- a/Source/Generated/GB_red__first_uint32.c
+++ b/Source/Generated/GB_red__first_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT32 || GxB_NO_FIRST_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_uint64.c b/Source/Generated/GB_red__first_uint64.c
index ee02149fe4..6cbcf95ff3 100644
--- a/Source/Generated/GB_red__first_uint64.c
+++ b/Source/Generated/GB_red__first_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT64 || GxB_NO_FIRST_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__first_uint8.c b/Source/Generated/GB_red__first_uint8.c
index f4f4bec3ea..1b4489806c 100644
--- a/Source/Generated/GB_red__first_uint8.c
+++ b/Source/Generated/GB_red__first_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_FIRST || GxB_NO_UINT8 || GxB_NO_FIRST_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__include.h b/Source/Generated/GB_red__include.h
index 1f1839aad1..ed280b691e 100644
--- a/Source/Generated/GB_red__include.h
+++ b/Source/Generated/GB_red__include.h
@@ -2,7 +2,7 @@
 // GB_red__include.h: definitions for GB_red__*.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.
 
 // This file has been automatically generated from Generator/GB_red.h
@@ -1029,6 +1029,567 @@ GrB_Info GB_red_build__max_fp64
 
 
 
+GrB_Info GB_red_scalar__any_int8
+(
+    int8_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_int8
+(
+    int8_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_int8
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_int8
+(
+    int8_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int8_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_int16
+(
+    int16_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_int16
+(
+    int16_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_int16
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_int16
+(
+    int16_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int16_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_int32
+(
+    int32_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_int32
+(
+    int32_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_int32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_int32
+(
+    int32_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int32_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_int64
+(
+    int64_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_int64
+(
+    int64_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_int64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_int64
+(
+    int64_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const int64_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_uint8
+(
+    uint8_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_uint8
+(
+    uint8_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_uint8
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_uint8
+(
+    uint8_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint8_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_uint16
+(
+    uint16_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_uint16
+(
+    uint16_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_uint16
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_uint16
+(
+    uint16_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint16_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_uint32
+(
+    uint32_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_uint32
+(
+    uint32_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_uint32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_uint32
+(
+    uint32_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint32_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_uint64
+(
+    uint64_t *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_uint64
+(
+    uint64_t *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_uint64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_uint64
+(
+    uint64_t *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const uint64_t *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_fp32
+(
+    float *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_fp32
+(
+    float *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_fp32
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_fp32
+(
+    float *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const float *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_fp64
+(
+    double *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_fp64
+(
+    double *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_fp64
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_fp64
+(
+    double *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const double *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
+GrB_Info GB_red_scalar__any_bool
+(
+    bool *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_bool
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const bool *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
+
+
 GrB_Info GB_red_scalar__plus_int8
 (
     int8_t *result,
@@ -2251,6 +2812,57 @@ GrB_Info GB_red_build__eq_bool
     int nthreads
 ) ;
 
+
+
+GrB_Info GB_red_scalar__any_bool
+(
+    bool *result,
+    const GrB_Matrix A,
+    GB_void *GB_RESTRICT W_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachvec__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    GB_void *Wfirst_space,
+    GB_void *Wlast_space,
+    int ntasks,
+    int nthreads
+) ;
+
+GrB_Info GB_red_eachindex__any_bool
+(
+    GrB_Matrix *Thandle,
+    GrB_Type ttype,
+    GrB_Matrix A,
+    const int64_t *GB_RESTRICT pstart_slice,
+    int nth,
+    int nthreads,
+    GB_Context Context
+) ;
+
+
+
+GrB_Info GB_red_build__any_bool
+(
+    bool *GB_RESTRICT Tx,
+    int64_t  *GB_RESTRICT Ti,
+    const bool *GB_RESTRICT S,
+    int64_t nvals,
+    int64_t ndupl,
+    const int64_t *GB_RESTRICT I_work,
+    const int64_t *GB_RESTRICT K_work,
+    const int64_t *GB_RESTRICT tstart_slice,
+    const int64_t *GB_RESTRICT tnz_slice,
+    int nthreads
+) ;
+
 #if 0
 
 GrB_Info GB_red_scalar__(none)
diff --git a/Source/Generated/GB_red__land_bool.c b/Source/Generated/GB_red__land_bool.c
index 9028ae7401..34ccf89340 100644
--- a/Source/Generated/GB_red__land_bool.c
+++ b/Source/Generated/GB_red__land_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         8
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LAND || GxB_NO_BOOL || GxB_NO_LAND_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__land_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__land_bool
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__land_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__land_bool
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__lor_bool.c b/Source/Generated/GB_red__lor_bool.c
index 4c72c511ae..eec039cbd9 100644
--- a/Source/Generated/GB_red__lor_bool.c
+++ b/Source/Generated/GB_red__lor_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         8
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LOR || GxB_NO_BOOL || GxB_NO_LOR_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__lor_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__lor_bool
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__lor_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__lor_bool
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__lxor_bool.c b/Source/Generated/GB_red__lxor_bool.c
index 8dc057f226..8a29661fb5 100644
--- a/Source/Generated/GB_red__lxor_bool.c
+++ b/Source/Generated/GB_red__lxor_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         8
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_LXOR || GxB_NO_BOOL || GxB_NO_LXOR_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__lxor_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__lxor_bool
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__lxor_bool
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__lxor_bool
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_fp32.c b/Source/Generated/GB_red__max_fp32.c
index 9022b292d9..45834dfe78 100644
--- a/Source/Generated/GB_red__max_fp32.c
+++ b/Source/Generated/GB_red__max_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP32 || GxB_NO_MAX_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_fp32
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_fp32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_fp64.c b/Source/Generated/GB_red__max_fp64.c
index 85272b9234..ec6cb27592 100644
--- a/Source/Generated/GB_red__max_fp64.c
+++ b/Source/Generated/GB_red__max_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_FP64 || GxB_NO_MAX_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_fp64
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_fp64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_int16.c b/Source/Generated/GB_red__max_int16.c
index 5e33827888..bc41b3b442 100644
--- a/Source/Generated/GB_red__max_int16.c
+++ b/Source/Generated/GB_red__max_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT16 || GxB_NO_MAX_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_int16
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_int16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_int32.c b/Source/Generated/GB_red__max_int32.c
index fbbc99b04b..164e5f9655 100644
--- a/Source/Generated/GB_red__max_int32.c
+++ b/Source/Generated/GB_red__max_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT32 || GxB_NO_MAX_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_int32
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_int32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_int64.c b/Source/Generated/GB_red__max_int64.c
index 9e15982dbd..38ea97a217 100644
--- a/Source/Generated/GB_red__max_int64.c
+++ b/Source/Generated/GB_red__max_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT64 || GxB_NO_MAX_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_int64
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_int64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_int8.c b/Source/Generated/GB_red__max_int8.c
index 099b20b832..6e312aa4ea 100644
--- a/Source/Generated/GB_red__max_int8.c
+++ b/Source/Generated/GB_red__max_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_INT8 || GxB_NO_MAX_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_int8
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_int8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_uint16.c b/Source/Generated/GB_red__max_uint16.c
index 333159ff9e..ff1703ce20 100644
--- a/Source/Generated/GB_red__max_uint16.c
+++ b/Source/Generated/GB_red__max_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT16 || GxB_NO_MAX_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_uint16
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_uint16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_uint32.c b/Source/Generated/GB_red__max_uint32.c
index 4e05d80ef2..7d9bdb2772 100644
--- a/Source/Generated/GB_red__max_uint32.c
+++ b/Source/Generated/GB_red__max_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT32 || GxB_NO_MAX_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_uint32
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_uint32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_uint64.c b/Source/Generated/GB_red__max_uint64.c
index bd77e137f7..d979352412 100644
--- a/Source/Generated/GB_red__max_uint64.c
+++ b/Source/Generated/GB_red__max_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT64 || GxB_NO_MAX_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_uint64
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_uint64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__max_uint8.c b/Source/Generated/GB_red__max_uint8.c
index 880dca68f5..65116003c8 100644
--- a/Source/Generated/GB_red__max_uint8.c
+++ b/Source/Generated/GB_red__max_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MAX || GxB_NO_UINT8 || GxB_NO_MAX_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__max_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__max_uint8
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__max_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__max_uint8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_fp32.c b/Source/Generated/GB_red__min_fp32.c
index 65ea7a7eec..5dedacdb6b 100644
--- a/Source/Generated/GB_red__min_fp32.c
+++ b/Source/Generated/GB_red__min_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP32 || GxB_NO_MIN_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_fp32
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_fp32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_fp64.c b/Source/Generated/GB_red__min_fp64.c
index c54d2f0a50..953c85bea1 100644
--- a/Source/Generated/GB_red__min_fp64.c
+++ b/Source/Generated/GB_red__min_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_FP64 || GxB_NO_MIN_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_fp64
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_fp64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_int16.c b/Source/Generated/GB_red__min_int16.c
index 87fd311a29..bbad390c3f 100644
--- a/Source/Generated/GB_red__min_int16.c
+++ b/Source/Generated/GB_red__min_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT16 || GxB_NO_MIN_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_int16
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_int16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_int32.c b/Source/Generated/GB_red__min_int32.c
index f78c52594b..149b7e6516 100644
--- a/Source/Generated/GB_red__min_int32.c
+++ b/Source/Generated/GB_red__min_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT32 || GxB_NO_MIN_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_int32
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_int32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_int64.c b/Source/Generated/GB_red__min_int64.c
index 5fe72839cc..5e189093f7 100644
--- a/Source/Generated/GB_red__min_int64.c
+++ b/Source/Generated/GB_red__min_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT64 || GxB_NO_MIN_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_int64
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_int64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_int8.c b/Source/Generated/GB_red__min_int8.c
index ee03a34485..75ff926ab9 100644
--- a/Source/Generated/GB_red__min_int8.c
+++ b/Source/Generated/GB_red__min_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_INT8 || GxB_NO_MIN_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_int8
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_int8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_uint16.c b/Source/Generated/GB_red__min_uint16.c
index 3219f726d4..096d7e4025 100644
--- a/Source/Generated/GB_red__min_uint16.c
+++ b/Source/Generated/GB_red__min_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT16 || GxB_NO_MIN_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_uint16
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_uint16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_uint32.c b/Source/Generated/GB_red__min_uint32.c
index afd673bf03..0f1d32fc93 100644
--- a/Source/Generated/GB_red__min_uint32.c
+++ b/Source/Generated/GB_red__min_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT32 || GxB_NO_MIN_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_uint32
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_uint32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_uint64.c b/Source/Generated/GB_red__min_uint64.c
index 28561093a2..0905aa034a 100644
--- a/Source/Generated/GB_red__min_uint64.c
+++ b/Source/Generated/GB_red__min_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT64 || GxB_NO_MIN_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_uint64
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_uint64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__min_uint8.c b/Source/Generated/GB_red__min_uint8.c
index 450f65901e..d80196b5dd 100644
--- a/Source/Generated/GB_red__min_uint8.c
+++ b/Source/Generated/GB_red__min_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_MIN || GxB_NO_UINT8 || GxB_NO_MIN_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__min_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__min_uint8
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__min_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__min_uint8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_fp32.c b/Source/Generated/GB_red__plus_fp32.c
index d9dfcdfda0..b58d5771f0 100644
--- a/Source/Generated/GB_red__plus_fp32.c
+++ b/Source/Generated/GB_red__plus_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP32 || GxB_NO_PLUS_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_fp32
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_fp32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_fp64.c b/Source/Generated/GB_red__plus_fp64.c
index 59315c97fa..bc024695eb 100644
--- a/Source/Generated/GB_red__plus_fp64.c
+++ b/Source/Generated/GB_red__plus_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         32
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_FP64 || GxB_NO_PLUS_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_fp64
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_fp64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_int16.c b/Source/Generated/GB_red__plus_int16.c
index bb44c97809..2a30357ddd 100644
--- a/Source/Generated/GB_red__plus_int16.c
+++ b/Source/Generated/GB_red__plus_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT16 || GxB_NO_PLUS_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_int16
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_int16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_int32.c b/Source/Generated/GB_red__plus_int32.c
index 02086b16e7..feae8cb88e 100644
--- a/Source/Generated/GB_red__plus_int32.c
+++ b/Source/Generated/GB_red__plus_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT32 || GxB_NO_PLUS_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_int32
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_int32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_int64.c b/Source/Generated/GB_red__plus_int64.c
index 8f860f3bb3..038340e614 100644
--- a/Source/Generated/GB_red__plus_int64.c
+++ b/Source/Generated/GB_red__plus_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         32
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT64 || GxB_NO_PLUS_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_int64
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_int64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_int8.c b/Source/Generated/GB_red__plus_int8.c
index 86fbb2dd2f..09752ba89a 100644
--- a/Source/Generated/GB_red__plus_int8.c
+++ b/Source/Generated/GB_red__plus_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_INT8 || GxB_NO_PLUS_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_int8
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_int8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_uint16.c b/Source/Generated/GB_red__plus_uint16.c
index 7f59d0c598..0b3e26d1b7 100644
--- a/Source/Generated/GB_red__plus_uint16.c
+++ b/Source/Generated/GB_red__plus_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT16 || GxB_NO_PLUS_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_uint16
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_uint16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_uint32.c b/Source/Generated/GB_red__plus_uint32.c
index 9556673efe..c6ef31ea53 100644
--- a/Source/Generated/GB_red__plus_uint32.c
+++ b/Source/Generated/GB_red__plus_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT32 || GxB_NO_PLUS_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_uint32
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_uint32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_uint64.c b/Source/Generated/GB_red__plus_uint64.c
index d4ce71f2f2..8d7435632f 100644
--- a/Source/Generated/GB_red__plus_uint64.c
+++ b/Source/Generated/GB_red__plus_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         32
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT64 || GxB_NO_PLUS_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_uint64
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_uint64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__plus_uint8.c b/Source/Generated/GB_red__plus_uint8.c
index 67ea8eab94..0808192be9 100644
--- a/Source/Generated/GB_red__plus_uint8.c
+++ b/Source/Generated/GB_red__plus_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_PLUS || GxB_NO_UINT8 || GxB_NO_PLUS_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__plus_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__plus_uint8
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__plus_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__plus_uint8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_bool.c b/Source/Generated/GB_red__second_bool.c
index c4ee503634..2f17f9f83a 100644
--- a/Source/Generated/GB_red__second_bool.c
+++ b/Source/Generated/GB_red__second_bool.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_BOOL || GxB_NO_SECOND_BOOL)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     bool *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_fp32.c b/Source/Generated/GB_red__second_fp32.c
index 2b974de754..4699804cf0 100644
--- a/Source/Generated/GB_red__second_fp32.c
+++ b/Source/Generated/GB_red__second_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_FP32 || GxB_NO_SECOND_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_fp64.c b/Source/Generated/GB_red__second_fp64.c
index 58121fda8a..795f1f54ca 100644
--- a/Source/Generated/GB_red__second_fp64.c
+++ b/Source/Generated/GB_red__second_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_FP64 || GxB_NO_SECOND_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_int16.c b/Source/Generated/GB_red__second_int16.c
index 8282fc645e..e58c6e2b64 100644
--- a/Source/Generated/GB_red__second_int16.c
+++ b/Source/Generated/GB_red__second_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT16 || GxB_NO_SECOND_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_int32.c b/Source/Generated/GB_red__second_int32.c
index 7177642072..cb30633947 100644
--- a/Source/Generated/GB_red__second_int32.c
+++ b/Source/Generated/GB_red__second_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT32 || GxB_NO_SECOND_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_int64.c b/Source/Generated/GB_red__second_int64.c
index d5951e95b2..18704f6c69 100644
--- a/Source/Generated/GB_red__second_int64.c
+++ b/Source/Generated/GB_red__second_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT64 || GxB_NO_SECOND_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_int8.c b/Source/Generated/GB_red__second_int8.c
index e543d850e2..2f20917d5d 100644
--- a/Source/Generated/GB_red__second_int8.c
+++ b/Source/Generated/GB_red__second_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_INT8 || GxB_NO_SECOND_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_uint16.c b/Source/Generated/GB_red__second_uint16.c
index 2a7b5e30fd..8ad3d30e51 100644
--- a/Source/Generated/GB_red__second_uint16.c
+++ b/Source/Generated/GB_red__second_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT16 || GxB_NO_SECOND_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_uint32.c b/Source/Generated/GB_red__second_uint32.c
index 0743de99a5..b4d5427a68 100644
--- a/Source/Generated/GB_red__second_uint32.c
+++ b/Source/Generated/GB_red__second_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT32 || GxB_NO_SECOND_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_uint64.c b/Source/Generated/GB_red__second_uint64.c
index 53984d1a48..a0dec713c8 100644
--- a/Source/Generated/GB_red__second_uint64.c
+++ b/Source/Generated/GB_red__second_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT64 || GxB_NO_SECOND_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__second_uint8.c b/Source/Generated/GB_red__second_uint8.c
index 7258758173..1af0975e6d 100644
--- a/Source/Generated/GB_red__second_uint8.c
+++ b/Source/Generated/GB_red__second_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         1
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_SECOND || GxB_NO_UINT8 || GxB_NO_SECOND_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachvec__(none)
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__(none)
     #endif
 }
 
+#endif
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+#if 0
+
 GrB_Info GB_red_eachindex__(none)
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_fp32.c b/Source/Generated/GB_red__times_fp32.c
index 3e9b63d14e..40f2065225 100644
--- a/Source/Generated/GB_red__times_fp32.c
+++ b/Source/Generated/GB_red__times_fp32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP32 || GxB_NO_TIMES_FP32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_fp32
 (
     float *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_fp32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_fp32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_fp64.c b/Source/Generated/GB_red__times_fp64.c
index e03f95c56f..facacab7a8 100644
--- a/Source/Generated/GB_red__times_fp64.c
+++ b/Source/Generated/GB_red__times_fp64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         32
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_FP64 || GxB_NO_TIMES_FP64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_fp64
 (
     double *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_fp64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_fp64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_int16.c b/Source/Generated/GB_red__times_int16.c
index 6b5e6198a7..67102d9898 100644
--- a/Source/Generated/GB_red__times_int16.c
+++ b/Source/Generated/GB_red__times_int16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT16 || GxB_NO_TIMES_INT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_int16
 (
     int16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_int16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_int16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_int32.c b/Source/Generated/GB_red__times_int32.c
index 3b0ca87b4f..fae7b47013 100644
--- a/Source/Generated/GB_red__times_int32.c
+++ b/Source/Generated/GB_red__times_int32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT32 || GxB_NO_TIMES_INT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_int32
 (
     int32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_int32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_int32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_int64.c b/Source/Generated/GB_red__times_int64.c
index 0f3d224579..e83a8dcc42 100644
--- a/Source/Generated/GB_red__times_int64.c
+++ b/Source/Generated/GB_red__times_int64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT64 || GxB_NO_TIMES_INT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_int64
 (
     int64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_int64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_int64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_int8.c b/Source/Generated/GB_red__times_int8.c
index e9f41ad2d4..a081cb72f5 100644
--- a/Source/Generated/GB_red__times_int8.c
+++ b/Source/Generated/GB_red__times_int8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_INT8 || GxB_NO_TIMES_INT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_int8
 (
     int8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_int8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_int8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_uint16.c b/Source/Generated/GB_red__times_uint16.c
index 5dcb157c3d..bed69b444d 100644
--- a/Source/Generated/GB_red__times_uint16.c
+++ b/Source/Generated/GB_red__times_uint16.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT16 || GxB_NO_TIMES_UINT16)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_uint16
 (
     uint16_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_uint16
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_uint16
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_uint32.c b/Source/Generated/GB_red__times_uint32.c
index 3d18d8229d..d1cda770ea 100644
--- a/Source/Generated/GB_red__times_uint32.c
+++ b/Source/Generated/GB_red__times_uint32.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT32 || GxB_NO_TIMES_UINT32)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_uint32
 (
     uint32_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_uint32
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_uint32
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_uint64.c b/Source/Generated/GB_red__times_uint64.c
index 7fe1510c40..b58d41fba0 100644
--- a/Source/Generated/GB_red__times_uint64.c
+++ b/Source/Generated/GB_red__times_uint64.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         16
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT64 || GxB_NO_TIMES_UINT64)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_uint64
 (
     uint64_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_uint64
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_uint64
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_red__times_uint8.c b/Source/Generated/GB_red__times_uint8.c
index 80c896d181..d7819478b0 100644
--- a/Source/Generated/GB_red__times_uint8.c
+++ b/Source/Generated/GB_red__times_uint8.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         64
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        0
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     (GxB_NO_TIMES || GxB_NO_UINT8 || GxB_NO_TIMES_UINT8)
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar__times_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachvec__times_uint8
 (
     uint8_t *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec__times_uint8
     #endif
 }
 
+
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+
+
 GrB_Info GB_red_eachindex__times_uint8
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generated/GB_sel__diag_any.c b/Source/Generated/GB_sel__diag_any.c
index 1def5cd49e..aa46f141d9 100644
--- a/Source/Generated/GB_sel__diag_any.c
+++ b/Source/Generated/GB_sel__diag_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_any.c b/Source/Generated/GB_sel__eq_thunk_any.c
index 051eeaa81f..663e269f31 100644
--- a/Source/Generated/GB_sel__eq_thunk_any.c
+++ b/Source/Generated/GB_sel__eq_thunk_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_fp32.c b/Source/Generated/GB_sel__eq_thunk_fp32.c
index 19a2255983..7e171fbdcf 100644
--- a/Source/Generated/GB_sel__eq_thunk_fp32.c
+++ b/Source/Generated/GB_sel__eq_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_fp64.c b/Source/Generated/GB_sel__eq_thunk_fp64.c
index 53e8681418..0d71d58000 100644
--- a/Source/Generated/GB_sel__eq_thunk_fp64.c
+++ b/Source/Generated/GB_sel__eq_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_int16.c b/Source/Generated/GB_sel__eq_thunk_int16.c
index 419cdbace3..c1d18ffaa8 100644
--- a/Source/Generated/GB_sel__eq_thunk_int16.c
+++ b/Source/Generated/GB_sel__eq_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_int32.c b/Source/Generated/GB_sel__eq_thunk_int32.c
index 775c637c45..b3fc16516d 100644
--- a/Source/Generated/GB_sel__eq_thunk_int32.c
+++ b/Source/Generated/GB_sel__eq_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_int64.c b/Source/Generated/GB_sel__eq_thunk_int64.c
index f2120438bb..104776c625 100644
--- a/Source/Generated/GB_sel__eq_thunk_int64.c
+++ b/Source/Generated/GB_sel__eq_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_int8.c b/Source/Generated/GB_sel__eq_thunk_int8.c
index 653f6f412f..1bf14acd3c 100644
--- a/Source/Generated/GB_sel__eq_thunk_int8.c
+++ b/Source/Generated/GB_sel__eq_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_uint16.c b/Source/Generated/GB_sel__eq_thunk_uint16.c
index 428f1ae730..b79b509356 100644
--- a/Source/Generated/GB_sel__eq_thunk_uint16.c
+++ b/Source/Generated/GB_sel__eq_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_uint32.c b/Source/Generated/GB_sel__eq_thunk_uint32.c
index 58cb369cc1..2f94bd504f 100644
--- a/Source/Generated/GB_sel__eq_thunk_uint32.c
+++ b/Source/Generated/GB_sel__eq_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_uint64.c b/Source/Generated/GB_sel__eq_thunk_uint64.c
index 1792085f93..bd31fc06f9 100644
--- a/Source/Generated/GB_sel__eq_thunk_uint64.c
+++ b/Source/Generated/GB_sel__eq_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_thunk_uint8.c b/Source/Generated/GB_sel__eq_thunk_uint8.c
index b6ce03a504..54216eaf5d 100644
--- a/Source/Generated/GB_sel__eq_thunk_uint8.c
+++ b/Source/Generated/GB_sel__eq_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_any.c b/Source/Generated/GB_sel__eq_zero_any.c
index 85ad8c1150..c52e383a7c 100644
--- a/Source/Generated/GB_sel__eq_zero_any.c
+++ b/Source/Generated/GB_sel__eq_zero_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_bool.c b/Source/Generated/GB_sel__eq_zero_bool.c
index 3e9deda817..7f242b861b 100644
--- a/Source/Generated/GB_sel__eq_zero_bool.c
+++ b/Source/Generated/GB_sel__eq_zero_bool.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_fp32.c b/Source/Generated/GB_sel__eq_zero_fp32.c
index a43fe12ad6..2ef2f9618c 100644
--- a/Source/Generated/GB_sel__eq_zero_fp32.c
+++ b/Source/Generated/GB_sel__eq_zero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_fp64.c b/Source/Generated/GB_sel__eq_zero_fp64.c
index 858ff38922..9b55f147b8 100644
--- a/Source/Generated/GB_sel__eq_zero_fp64.c
+++ b/Source/Generated/GB_sel__eq_zero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_int16.c b/Source/Generated/GB_sel__eq_zero_int16.c
index b6ac2f7348..f0f3c282bd 100644
--- a/Source/Generated/GB_sel__eq_zero_int16.c
+++ b/Source/Generated/GB_sel__eq_zero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_int32.c b/Source/Generated/GB_sel__eq_zero_int32.c
index e2ce3fe116..e891ed6c9e 100644
--- a/Source/Generated/GB_sel__eq_zero_int32.c
+++ b/Source/Generated/GB_sel__eq_zero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_int64.c b/Source/Generated/GB_sel__eq_zero_int64.c
index 5d5884cee9..7b0355bacc 100644
--- a/Source/Generated/GB_sel__eq_zero_int64.c
+++ b/Source/Generated/GB_sel__eq_zero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_int8.c b/Source/Generated/GB_sel__eq_zero_int8.c
index a8293072eb..81c0b656b2 100644
--- a/Source/Generated/GB_sel__eq_zero_int8.c
+++ b/Source/Generated/GB_sel__eq_zero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_uint16.c b/Source/Generated/GB_sel__eq_zero_uint16.c
index 4954bc51ca..815e339f42 100644
--- a/Source/Generated/GB_sel__eq_zero_uint16.c
+++ b/Source/Generated/GB_sel__eq_zero_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_uint32.c b/Source/Generated/GB_sel__eq_zero_uint32.c
index 619a6bf3f4..74c4764ca3 100644
--- a/Source/Generated/GB_sel__eq_zero_uint32.c
+++ b/Source/Generated/GB_sel__eq_zero_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_uint64.c b/Source/Generated/GB_sel__eq_zero_uint64.c
index e714b1923f..44a39dbf5e 100644
--- a/Source/Generated/GB_sel__eq_zero_uint64.c
+++ b/Source/Generated/GB_sel__eq_zero_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__eq_zero_uint8.c b/Source/Generated/GB_sel__eq_zero_uint8.c
index 07e6dac4bc..180ddf4786 100644
--- a/Source/Generated/GB_sel__eq_zero_uint8.c
+++ b/Source/Generated/GB_sel__eq_zero_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_fp32.c b/Source/Generated/GB_sel__ge_thunk_fp32.c
index 86883c7f38..bdee2630cc 100644
--- a/Source/Generated/GB_sel__ge_thunk_fp32.c
+++ b/Source/Generated/GB_sel__ge_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_fp64.c b/Source/Generated/GB_sel__ge_thunk_fp64.c
index 9c145261e5..bea6726bdf 100644
--- a/Source/Generated/GB_sel__ge_thunk_fp64.c
+++ b/Source/Generated/GB_sel__ge_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_int16.c b/Source/Generated/GB_sel__ge_thunk_int16.c
index 2f6dff0874..c30f75cc3f 100644
--- a/Source/Generated/GB_sel__ge_thunk_int16.c
+++ b/Source/Generated/GB_sel__ge_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_int32.c b/Source/Generated/GB_sel__ge_thunk_int32.c
index 76d02a3891..5aadc05f48 100644
--- a/Source/Generated/GB_sel__ge_thunk_int32.c
+++ b/Source/Generated/GB_sel__ge_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_int64.c b/Source/Generated/GB_sel__ge_thunk_int64.c
index 7761c9eab8..0de145413b 100644
--- a/Source/Generated/GB_sel__ge_thunk_int64.c
+++ b/Source/Generated/GB_sel__ge_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_int8.c b/Source/Generated/GB_sel__ge_thunk_int8.c
index 6c73a92577..573e387632 100644
--- a/Source/Generated/GB_sel__ge_thunk_int8.c
+++ b/Source/Generated/GB_sel__ge_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_uint16.c b/Source/Generated/GB_sel__ge_thunk_uint16.c
index fa012d2f69..8d5bb3ce58 100644
--- a/Source/Generated/GB_sel__ge_thunk_uint16.c
+++ b/Source/Generated/GB_sel__ge_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_uint32.c b/Source/Generated/GB_sel__ge_thunk_uint32.c
index bcd0072d15..4ab2334645 100644
--- a/Source/Generated/GB_sel__ge_thunk_uint32.c
+++ b/Source/Generated/GB_sel__ge_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_uint64.c b/Source/Generated/GB_sel__ge_thunk_uint64.c
index e4e739899d..664fa04312 100644
--- a/Source/Generated/GB_sel__ge_thunk_uint64.c
+++ b/Source/Generated/GB_sel__ge_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_thunk_uint8.c b/Source/Generated/GB_sel__ge_thunk_uint8.c
index af5687771a..ea67fa449a 100644
--- a/Source/Generated/GB_sel__ge_thunk_uint8.c
+++ b/Source/Generated/GB_sel__ge_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_fp32.c b/Source/Generated/GB_sel__ge_zero_fp32.c
index 77e6338638..20bef1a520 100644
--- a/Source/Generated/GB_sel__ge_zero_fp32.c
+++ b/Source/Generated/GB_sel__ge_zero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_fp64.c b/Source/Generated/GB_sel__ge_zero_fp64.c
index bed858efce..1dde459369 100644
--- a/Source/Generated/GB_sel__ge_zero_fp64.c
+++ b/Source/Generated/GB_sel__ge_zero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_int16.c b/Source/Generated/GB_sel__ge_zero_int16.c
index 536c29cf88..6480019a36 100644
--- a/Source/Generated/GB_sel__ge_zero_int16.c
+++ b/Source/Generated/GB_sel__ge_zero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_int32.c b/Source/Generated/GB_sel__ge_zero_int32.c
index dba67611e7..5311d6178d 100644
--- a/Source/Generated/GB_sel__ge_zero_int32.c
+++ b/Source/Generated/GB_sel__ge_zero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_int64.c b/Source/Generated/GB_sel__ge_zero_int64.c
index 43d9e38a81..6cbddc657b 100644
--- a/Source/Generated/GB_sel__ge_zero_int64.c
+++ b/Source/Generated/GB_sel__ge_zero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ge_zero_int8.c b/Source/Generated/GB_sel__ge_zero_int8.c
index a22f15864f..dfeeef8b7d 100644
--- a/Source/Generated/GB_sel__ge_zero_int8.c
+++ b/Source/Generated/GB_sel__ge_zero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_fp32.c b/Source/Generated/GB_sel__gt_thunk_fp32.c
index 83a4ece592..6586a5a8bf 100644
--- a/Source/Generated/GB_sel__gt_thunk_fp32.c
+++ b/Source/Generated/GB_sel__gt_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_fp64.c b/Source/Generated/GB_sel__gt_thunk_fp64.c
index 897b427ed7..580840e93a 100644
--- a/Source/Generated/GB_sel__gt_thunk_fp64.c
+++ b/Source/Generated/GB_sel__gt_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_int16.c b/Source/Generated/GB_sel__gt_thunk_int16.c
index e3e877b109..4e80bfad2e 100644
--- a/Source/Generated/GB_sel__gt_thunk_int16.c
+++ b/Source/Generated/GB_sel__gt_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_int32.c b/Source/Generated/GB_sel__gt_thunk_int32.c
index c89c2e12e1..45d1e743a2 100644
--- a/Source/Generated/GB_sel__gt_thunk_int32.c
+++ b/Source/Generated/GB_sel__gt_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_int64.c b/Source/Generated/GB_sel__gt_thunk_int64.c
index ccbd5d701b..ee5d452255 100644
--- a/Source/Generated/GB_sel__gt_thunk_int64.c
+++ b/Source/Generated/GB_sel__gt_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_int8.c b/Source/Generated/GB_sel__gt_thunk_int8.c
index 6ff630e835..e9366dfcb8 100644
--- a/Source/Generated/GB_sel__gt_thunk_int8.c
+++ b/Source/Generated/GB_sel__gt_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_uint16.c b/Source/Generated/GB_sel__gt_thunk_uint16.c
index 3cef0303f5..13711f35c2 100644
--- a/Source/Generated/GB_sel__gt_thunk_uint16.c
+++ b/Source/Generated/GB_sel__gt_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_uint32.c b/Source/Generated/GB_sel__gt_thunk_uint32.c
index 7f3c5e9454..c76155a6be 100644
--- a/Source/Generated/GB_sel__gt_thunk_uint32.c
+++ b/Source/Generated/GB_sel__gt_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_uint64.c b/Source/Generated/GB_sel__gt_thunk_uint64.c
index d5b840b6a3..1a29b8790f 100644
--- a/Source/Generated/GB_sel__gt_thunk_uint64.c
+++ b/Source/Generated/GB_sel__gt_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_thunk_uint8.c b/Source/Generated/GB_sel__gt_thunk_uint8.c
index 29a97056d3..f7535fa098 100644
--- a/Source/Generated/GB_sel__gt_thunk_uint8.c
+++ b/Source/Generated/GB_sel__gt_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_fp32.c b/Source/Generated/GB_sel__gt_zero_fp32.c
index a89c89ca02..d9aedea6cc 100644
--- a/Source/Generated/GB_sel__gt_zero_fp32.c
+++ b/Source/Generated/GB_sel__gt_zero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_fp64.c b/Source/Generated/GB_sel__gt_zero_fp64.c
index bc4733abc3..a3c285cc11 100644
--- a/Source/Generated/GB_sel__gt_zero_fp64.c
+++ b/Source/Generated/GB_sel__gt_zero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_int16.c b/Source/Generated/GB_sel__gt_zero_int16.c
index 027bf25684..f6c7a42f59 100644
--- a/Source/Generated/GB_sel__gt_zero_int16.c
+++ b/Source/Generated/GB_sel__gt_zero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_int32.c b/Source/Generated/GB_sel__gt_zero_int32.c
index 7d6ac1a21d..3241021ae8 100644
--- a/Source/Generated/GB_sel__gt_zero_int32.c
+++ b/Source/Generated/GB_sel__gt_zero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_int64.c b/Source/Generated/GB_sel__gt_zero_int64.c
index b568eae8e6..1f606d873e 100644
--- a/Source/Generated/GB_sel__gt_zero_int64.c
+++ b/Source/Generated/GB_sel__gt_zero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__gt_zero_int8.c b/Source/Generated/GB_sel__gt_zero_int8.c
index cac05cb7c2..c6836dfe9e 100644
--- a/Source/Generated/GB_sel__gt_zero_int8.c
+++ b/Source/Generated/GB_sel__gt_zero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__include.h b/Source/Generated/GB_sel__include.h
index 3a19f5ada7..aa570541a9 100644
--- a/Source/Generated/GB_sel__include.h
+++ b/Source/Generated/GB_sel__include.h
@@ -2,7 +2,7 @@
 // GB_sel__include.h: definitions for GB_sel__*.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.
 
 // This file has been automatically generated from Generator/GB_sel.h
diff --git a/Source/Generated/GB_sel__le_thunk_fp32.c b/Source/Generated/GB_sel__le_thunk_fp32.c
index 5b2a72909b..9e0a58f48d 100644
--- a/Source/Generated/GB_sel__le_thunk_fp32.c
+++ b/Source/Generated/GB_sel__le_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_fp64.c b/Source/Generated/GB_sel__le_thunk_fp64.c
index 073ba9765a..7b8bc0e392 100644
--- a/Source/Generated/GB_sel__le_thunk_fp64.c
+++ b/Source/Generated/GB_sel__le_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_int16.c b/Source/Generated/GB_sel__le_thunk_int16.c
index 6bad0bc55d..a62e13ac6a 100644
--- a/Source/Generated/GB_sel__le_thunk_int16.c
+++ b/Source/Generated/GB_sel__le_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_int32.c b/Source/Generated/GB_sel__le_thunk_int32.c
index 221662dcb1..7c13e5e4e0 100644
--- a/Source/Generated/GB_sel__le_thunk_int32.c
+++ b/Source/Generated/GB_sel__le_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_int64.c b/Source/Generated/GB_sel__le_thunk_int64.c
index 15c2fb52f7..908a5d0f35 100644
--- a/Source/Generated/GB_sel__le_thunk_int64.c
+++ b/Source/Generated/GB_sel__le_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_int8.c b/Source/Generated/GB_sel__le_thunk_int8.c
index fcea22ccad..df23b542e4 100644
--- a/Source/Generated/GB_sel__le_thunk_int8.c
+++ b/Source/Generated/GB_sel__le_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_uint16.c b/Source/Generated/GB_sel__le_thunk_uint16.c
index 3d6b38e84e..d8714e925b 100644
--- a/Source/Generated/GB_sel__le_thunk_uint16.c
+++ b/Source/Generated/GB_sel__le_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_uint32.c b/Source/Generated/GB_sel__le_thunk_uint32.c
index 6a18b7cc49..7321e84967 100644
--- a/Source/Generated/GB_sel__le_thunk_uint32.c
+++ b/Source/Generated/GB_sel__le_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_uint64.c b/Source/Generated/GB_sel__le_thunk_uint64.c
index df1fc2bc5a..5c7a618e3e 100644
--- a/Source/Generated/GB_sel__le_thunk_uint64.c
+++ b/Source/Generated/GB_sel__le_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_thunk_uint8.c b/Source/Generated/GB_sel__le_thunk_uint8.c
index 3d892023f9..929f5003e2 100644
--- a/Source/Generated/GB_sel__le_thunk_uint8.c
+++ b/Source/Generated/GB_sel__le_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_fp32.c b/Source/Generated/GB_sel__le_zero_fp32.c
index 0f80190331..d9371735aa 100644
--- a/Source/Generated/GB_sel__le_zero_fp32.c
+++ b/Source/Generated/GB_sel__le_zero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_fp64.c b/Source/Generated/GB_sel__le_zero_fp64.c
index 6b1b0d7618..8c6e7e2776 100644
--- a/Source/Generated/GB_sel__le_zero_fp64.c
+++ b/Source/Generated/GB_sel__le_zero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_int16.c b/Source/Generated/GB_sel__le_zero_int16.c
index 9ef181d913..99c9665c29 100644
--- a/Source/Generated/GB_sel__le_zero_int16.c
+++ b/Source/Generated/GB_sel__le_zero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_int32.c b/Source/Generated/GB_sel__le_zero_int32.c
index f7f57af1c3..21a546f0f1 100644
--- a/Source/Generated/GB_sel__le_zero_int32.c
+++ b/Source/Generated/GB_sel__le_zero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_int64.c b/Source/Generated/GB_sel__le_zero_int64.c
index a7c93b9615..b19b0f9723 100644
--- a/Source/Generated/GB_sel__le_zero_int64.c
+++ b/Source/Generated/GB_sel__le_zero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__le_zero_int8.c b/Source/Generated/GB_sel__le_zero_int8.c
index 2973d916d0..0a79ff77ea 100644
--- a/Source/Generated/GB_sel__le_zero_int8.c
+++ b/Source/Generated/GB_sel__le_zero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_fp32.c b/Source/Generated/GB_sel__lt_thunk_fp32.c
index 2ebdb30e2c..3467587cbb 100644
--- a/Source/Generated/GB_sel__lt_thunk_fp32.c
+++ b/Source/Generated/GB_sel__lt_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_fp64.c b/Source/Generated/GB_sel__lt_thunk_fp64.c
index 7ac806036b..2754db0215 100644
--- a/Source/Generated/GB_sel__lt_thunk_fp64.c
+++ b/Source/Generated/GB_sel__lt_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_int16.c b/Source/Generated/GB_sel__lt_thunk_int16.c
index 29a037c892..a1bc58ca74 100644
--- a/Source/Generated/GB_sel__lt_thunk_int16.c
+++ b/Source/Generated/GB_sel__lt_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_int32.c b/Source/Generated/GB_sel__lt_thunk_int32.c
index 68fb73e6b2..53185b3cf3 100644
--- a/Source/Generated/GB_sel__lt_thunk_int32.c
+++ b/Source/Generated/GB_sel__lt_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_int64.c b/Source/Generated/GB_sel__lt_thunk_int64.c
index a2efcba53b..352e015d6a 100644
--- a/Source/Generated/GB_sel__lt_thunk_int64.c
+++ b/Source/Generated/GB_sel__lt_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_int8.c b/Source/Generated/GB_sel__lt_thunk_int8.c
index 26a70d194f..2b447ca0bb 100644
--- a/Source/Generated/GB_sel__lt_thunk_int8.c
+++ b/Source/Generated/GB_sel__lt_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_uint16.c b/Source/Generated/GB_sel__lt_thunk_uint16.c
index 6d417ff5a5..e84049f611 100644
--- a/Source/Generated/GB_sel__lt_thunk_uint16.c
+++ b/Source/Generated/GB_sel__lt_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_uint32.c b/Source/Generated/GB_sel__lt_thunk_uint32.c
index 45c7825fb7..44a2a298b1 100644
--- a/Source/Generated/GB_sel__lt_thunk_uint32.c
+++ b/Source/Generated/GB_sel__lt_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_uint64.c b/Source/Generated/GB_sel__lt_thunk_uint64.c
index 34ebd66e23..db931a645e 100644
--- a/Source/Generated/GB_sel__lt_thunk_uint64.c
+++ b/Source/Generated/GB_sel__lt_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_thunk_uint8.c b/Source/Generated/GB_sel__lt_thunk_uint8.c
index b301bdd45b..4b84b20d36 100644
--- a/Source/Generated/GB_sel__lt_thunk_uint8.c
+++ b/Source/Generated/GB_sel__lt_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_fp32.c b/Source/Generated/GB_sel__lt_zero_fp32.c
index bcb42a0f33..24121176e7 100644
--- a/Source/Generated/GB_sel__lt_zero_fp32.c
+++ b/Source/Generated/GB_sel__lt_zero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_fp64.c b/Source/Generated/GB_sel__lt_zero_fp64.c
index 97fba346ac..4fc8d98082 100644
--- a/Source/Generated/GB_sel__lt_zero_fp64.c
+++ b/Source/Generated/GB_sel__lt_zero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_int16.c b/Source/Generated/GB_sel__lt_zero_int16.c
index 58b3d48c66..bc9394acb6 100644
--- a/Source/Generated/GB_sel__lt_zero_int16.c
+++ b/Source/Generated/GB_sel__lt_zero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_int32.c b/Source/Generated/GB_sel__lt_zero_int32.c
index c66dcd1a7c..3b8d0fd1a2 100644
--- a/Source/Generated/GB_sel__lt_zero_int32.c
+++ b/Source/Generated/GB_sel__lt_zero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_int64.c b/Source/Generated/GB_sel__lt_zero_int64.c
index 583bdfed04..bac037db57 100644
--- a/Source/Generated/GB_sel__lt_zero_int64.c
+++ b/Source/Generated/GB_sel__lt_zero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__lt_zero_int8.c b/Source/Generated/GB_sel__lt_zero_int8.c
index 2194b0ba65..ee3e1c0c0a 100644
--- a/Source/Generated/GB_sel__lt_zero_int8.c
+++ b/Source/Generated/GB_sel__lt_zero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_any.c b/Source/Generated/GB_sel__ne_thunk_any.c
index 1ae654abb2..022b41483e 100644
--- a/Source/Generated/GB_sel__ne_thunk_any.c
+++ b/Source/Generated/GB_sel__ne_thunk_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_fp32.c b/Source/Generated/GB_sel__ne_thunk_fp32.c
index be4b42e9e4..6b662f3837 100644
--- a/Source/Generated/GB_sel__ne_thunk_fp32.c
+++ b/Source/Generated/GB_sel__ne_thunk_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_fp64.c b/Source/Generated/GB_sel__ne_thunk_fp64.c
index 9d68da2a8b..a33c9779c7 100644
--- a/Source/Generated/GB_sel__ne_thunk_fp64.c
+++ b/Source/Generated/GB_sel__ne_thunk_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_int16.c b/Source/Generated/GB_sel__ne_thunk_int16.c
index 4a1499dbbb..dab92f00bf 100644
--- a/Source/Generated/GB_sel__ne_thunk_int16.c
+++ b/Source/Generated/GB_sel__ne_thunk_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_int32.c b/Source/Generated/GB_sel__ne_thunk_int32.c
index 5e8ec1fd61..6c87f45612 100644
--- a/Source/Generated/GB_sel__ne_thunk_int32.c
+++ b/Source/Generated/GB_sel__ne_thunk_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_int64.c b/Source/Generated/GB_sel__ne_thunk_int64.c
index b9e02c90fd..fad9527206 100644
--- a/Source/Generated/GB_sel__ne_thunk_int64.c
+++ b/Source/Generated/GB_sel__ne_thunk_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_int8.c b/Source/Generated/GB_sel__ne_thunk_int8.c
index 51f5b3e63b..03e6ec7271 100644
--- a/Source/Generated/GB_sel__ne_thunk_int8.c
+++ b/Source/Generated/GB_sel__ne_thunk_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_uint16.c b/Source/Generated/GB_sel__ne_thunk_uint16.c
index c11c3df45a..305f79e02d 100644
--- a/Source/Generated/GB_sel__ne_thunk_uint16.c
+++ b/Source/Generated/GB_sel__ne_thunk_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_uint32.c b/Source/Generated/GB_sel__ne_thunk_uint32.c
index ac90a10d23..48bbd16f33 100644
--- a/Source/Generated/GB_sel__ne_thunk_uint32.c
+++ b/Source/Generated/GB_sel__ne_thunk_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_uint64.c b/Source/Generated/GB_sel__ne_thunk_uint64.c
index 3adbdd7ad7..9cb53199ba 100644
--- a/Source/Generated/GB_sel__ne_thunk_uint64.c
+++ b/Source/Generated/GB_sel__ne_thunk_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__ne_thunk_uint8.c b/Source/Generated/GB_sel__ne_thunk_uint8.c
index bde3b226e5..401717eddc 100644
--- a/Source/Generated/GB_sel__ne_thunk_uint8.c
+++ b/Source/Generated/GB_sel__ne_thunk_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_any.c b/Source/Generated/GB_sel__nonzero_any.c
index c76d909641..d4b46bea0b 100644
--- a/Source/Generated/GB_sel__nonzero_any.c
+++ b/Source/Generated/GB_sel__nonzero_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_bool.c b/Source/Generated/GB_sel__nonzero_bool.c
index 1623c55903..f2e51c7278 100644
--- a/Source/Generated/GB_sel__nonzero_bool.c
+++ b/Source/Generated/GB_sel__nonzero_bool.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_fp32.c b/Source/Generated/GB_sel__nonzero_fp32.c
index 12460f4a56..06d1bb66c1 100644
--- a/Source/Generated/GB_sel__nonzero_fp32.c
+++ b/Source/Generated/GB_sel__nonzero_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_fp64.c b/Source/Generated/GB_sel__nonzero_fp64.c
index 6f7f2953e9..28eeaaa7a2 100644
--- a/Source/Generated/GB_sel__nonzero_fp64.c
+++ b/Source/Generated/GB_sel__nonzero_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_int16.c b/Source/Generated/GB_sel__nonzero_int16.c
index cc032b359f..8af5b622bc 100644
--- a/Source/Generated/GB_sel__nonzero_int16.c
+++ b/Source/Generated/GB_sel__nonzero_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_int32.c b/Source/Generated/GB_sel__nonzero_int32.c
index 61b34d1985..04fbdb0758 100644
--- a/Source/Generated/GB_sel__nonzero_int32.c
+++ b/Source/Generated/GB_sel__nonzero_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_int64.c b/Source/Generated/GB_sel__nonzero_int64.c
index 35cd7f20a7..5124e640f0 100644
--- a/Source/Generated/GB_sel__nonzero_int64.c
+++ b/Source/Generated/GB_sel__nonzero_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_int8.c b/Source/Generated/GB_sel__nonzero_int8.c
index 2d3eae629b..c11d047f02 100644
--- a/Source/Generated/GB_sel__nonzero_int8.c
+++ b/Source/Generated/GB_sel__nonzero_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_uint16.c b/Source/Generated/GB_sel__nonzero_uint16.c
index e9ab85fbf9..d586358b28 100644
--- a/Source/Generated/GB_sel__nonzero_uint16.c
+++ b/Source/Generated/GB_sel__nonzero_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_uint32.c b/Source/Generated/GB_sel__nonzero_uint32.c
index 5a816dd356..3beec3001d 100644
--- a/Source/Generated/GB_sel__nonzero_uint32.c
+++ b/Source/Generated/GB_sel__nonzero_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_uint64.c b/Source/Generated/GB_sel__nonzero_uint64.c
index 2368b541ba..246b7c135e 100644
--- a/Source/Generated/GB_sel__nonzero_uint64.c
+++ b/Source/Generated/GB_sel__nonzero_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzero_uint8.c b/Source/Generated/GB_sel__nonzero_uint8.c
index 8176a9bb86..2b3dd1bcfc 100644
--- a/Source/Generated/GB_sel__nonzero_uint8.c
+++ b/Source/Generated/GB_sel__nonzero_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_any.c b/Source/Generated/GB_sel__nonzombie_any.c
index 44822d21fe..88b3065c36 100644
--- a/Source/Generated/GB_sel__nonzombie_any.c
+++ b/Source/Generated/GB_sel__nonzombie_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_bool.c b/Source/Generated/GB_sel__nonzombie_bool.c
index 2a6bbabce0..bd4a5cb4ce 100644
--- a/Source/Generated/GB_sel__nonzombie_bool.c
+++ b/Source/Generated/GB_sel__nonzombie_bool.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_fp32.c b/Source/Generated/GB_sel__nonzombie_fp32.c
index 6d8dc831bc..850c7392c6 100644
--- a/Source/Generated/GB_sel__nonzombie_fp32.c
+++ b/Source/Generated/GB_sel__nonzombie_fp32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_fp64.c b/Source/Generated/GB_sel__nonzombie_fp64.c
index b045ccbacf..1c6db3f9fb 100644
--- a/Source/Generated/GB_sel__nonzombie_fp64.c
+++ b/Source/Generated/GB_sel__nonzombie_fp64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_int16.c b/Source/Generated/GB_sel__nonzombie_int16.c
index e87ec9c11b..18e4102483 100644
--- a/Source/Generated/GB_sel__nonzombie_int16.c
+++ b/Source/Generated/GB_sel__nonzombie_int16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_int32.c b/Source/Generated/GB_sel__nonzombie_int32.c
index 5fa877e049..09ae8de3bc 100644
--- a/Source/Generated/GB_sel__nonzombie_int32.c
+++ b/Source/Generated/GB_sel__nonzombie_int32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_int64.c b/Source/Generated/GB_sel__nonzombie_int64.c
index 6fb85fe722..c5dc9a05cb 100644
--- a/Source/Generated/GB_sel__nonzombie_int64.c
+++ b/Source/Generated/GB_sel__nonzombie_int64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_int8.c b/Source/Generated/GB_sel__nonzombie_int8.c
index ad51e7bad8..c32ec1f7cf 100644
--- a/Source/Generated/GB_sel__nonzombie_int8.c
+++ b/Source/Generated/GB_sel__nonzombie_int8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_uint16.c b/Source/Generated/GB_sel__nonzombie_uint16.c
index d9ca1dd5bc..655d8ec5cb 100644
--- a/Source/Generated/GB_sel__nonzombie_uint16.c
+++ b/Source/Generated/GB_sel__nonzombie_uint16.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_uint32.c b/Source/Generated/GB_sel__nonzombie_uint32.c
index 7bea433e75..c662c9c064 100644
--- a/Source/Generated/GB_sel__nonzombie_uint32.c
+++ b/Source/Generated/GB_sel__nonzombie_uint32.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_uint64.c b/Source/Generated/GB_sel__nonzombie_uint64.c
index d057e5f687..f2ef739ccc 100644
--- a/Source/Generated/GB_sel__nonzombie_uint64.c
+++ b/Source/Generated/GB_sel__nonzombie_uint64.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__nonzombie_uint8.c b/Source/Generated/GB_sel__nonzombie_uint8.c
index 120ac0b3e4..3b993a8ead 100644
--- a/Source/Generated/GB_sel__nonzombie_uint8.c
+++ b/Source/Generated/GB_sel__nonzombie_uint8.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__offdiag_any.c b/Source/Generated/GB_sel__offdiag_any.c
index f1b06e3308..dbf7278402 100644
--- a/Source/Generated/GB_sel__offdiag_any.c
+++ b/Source/Generated/GB_sel__offdiag_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__resize_any.c b/Source/Generated/GB_sel__resize_any.c
index 5c7947eace..587f1c97b2 100644
--- a/Source/Generated/GB_sel__resize_any.c
+++ b/Source/Generated/GB_sel__resize_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__tril_any.c b/Source/Generated/GB_sel__tril_any.c
index 1efb4c5878..d7ebb4a3b3 100644
--- a/Source/Generated/GB_sel__tril_any.c
+++ b/Source/Generated/GB_sel__tril_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__triu_any.c b/Source/Generated/GB_sel__triu_any.c
index c53e75ba6e..808c894c05 100644
--- a/Source/Generated/GB_sel__triu_any.c
+++ b/Source/Generated/GB_sel__triu_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_sel__user_any.c b/Source/Generated/GB_sel__user_any.c
index ef6c282e92..2852802d81 100644
--- a/Source/Generated/GB_sel__user_any.c
+++ b/Source/Generated/GB_sel__user_any.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generated/GB_type__bool.c b/Source/Generated/GB_type__bool.c
new file mode 100644
index 0000000000..56ac40ca01
--- /dev/null
+++ b/Source/Generated/GB_type__bool.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__bool
+// C<A>=A (C is dense): GB_Cdense_06d__bool
+
+// C type:   bool
+
+#define GB_CTYPE \
+    bool
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_BOOL)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    bool cwork = (*((bool *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__fp32.c b/Source/Generated/GB_type__fp32.c
new file mode 100644
index 0000000000..30ac60790f
--- /dev/null
+++ b/Source/Generated/GB_type__fp32.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__fp32
+// C<A>=A (C is dense): GB_Cdense_06d__fp32
+
+// C type:   float
+
+#define GB_CTYPE \
+    float
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_FP32)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    float cwork = (*((float *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__fp64.c b/Source/Generated/GB_type__fp64.c
new file mode 100644
index 0000000000..e881e7e7fe
--- /dev/null
+++ b/Source/Generated/GB_type__fp64.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__fp64
+// C<A>=A (C is dense): GB_Cdense_06d__fp64
+
+// C type:   double
+
+#define GB_CTYPE \
+    double
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_FP64)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    double cwork = (*((double *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__include.h b/Source/Generated/GB_type__include.h
new file mode 100644
index 0000000000..72d78a5f60
--- /dev/null
+++ b/Source/Generated/GB_type__include.h
@@ -0,0 +1,427 @@
+//------------------------------------------------------------------------------
+// GB_type__include.h: definitions for GB_type__*.c
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+// This file has been automatically generated from Generator/GB_type.h
+
+
+GrB_Info GB_Cdense_05d__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__bool
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__fp32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+
+GrB_Info GB_Cdense_05d__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25__fp64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
diff --git a/Source/Generated/GB_type__int16.c b/Source/Generated/GB_type__int16.c
new file mode 100644
index 0000000000..87c30871f3
--- /dev/null
+++ b/Source/Generated/GB_type__int16.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__int16
+// C<A>=A (C is dense): GB_Cdense_06d__int16
+
+// C type:   int16_t
+
+#define GB_CTYPE \
+    int16_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_INT16)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int16_t cwork = (*((int16_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__int16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__int32.c b/Source/Generated/GB_type__int32.c
new file mode 100644
index 0000000000..ff329c469a
--- /dev/null
+++ b/Source/Generated/GB_type__int32.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__int32
+// C<A>=A (C is dense): GB_Cdense_06d__int32
+
+// C type:   int32_t
+
+#define GB_CTYPE \
+    int32_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_INT32)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int32_t cwork = (*((int32_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__int32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__int64.c b/Source/Generated/GB_type__int64.c
new file mode 100644
index 0000000000..c9af7d5996
--- /dev/null
+++ b/Source/Generated/GB_type__int64.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__int64
+// C<A>=A (C is dense): GB_Cdense_06d__int64
+
+// C type:   int64_t
+
+#define GB_CTYPE \
+    int64_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_INT64)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int64_t cwork = (*((int64_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__int64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__int8.c b/Source/Generated/GB_type__int8.c
new file mode 100644
index 0000000000..1317971ed7
--- /dev/null
+++ b/Source/Generated/GB_type__int8.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__int8
+// C<A>=A (C is dense): GB_Cdense_06d__int8
+
+// C type:   int8_t
+
+#define GB_CTYPE \
+    int8_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_INT8)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    int8_t cwork = (*((int8_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__int8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__uint16.c b/Source/Generated/GB_type__uint16.c
new file mode 100644
index 0000000000..d44dccb01d
--- /dev/null
+++ b/Source/Generated/GB_type__uint16.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__uint16
+// C<A>=A (C is dense): GB_Cdense_06d__uint16
+
+// C type:   uint16_t
+
+#define GB_CTYPE \
+    uint16_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_UINT16)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint16_t cwork = (*((uint16_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__uint16
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__uint32.c b/Source/Generated/GB_type__uint32.c
new file mode 100644
index 0000000000..320ee651f4
--- /dev/null
+++ b/Source/Generated/GB_type__uint32.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__uint32
+// C<A>=A (C is dense): GB_Cdense_06d__uint32
+
+// C type:   uint32_t
+
+#define GB_CTYPE \
+    uint32_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_UINT32)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint32_t cwork = (*((uint32_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__uint32
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__uint64.c b/Source/Generated/GB_type__uint64.c
new file mode 100644
index 0000000000..5b92193a25
--- /dev/null
+++ b/Source/Generated/GB_type__uint64.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__uint64
+// C<A>=A (C is dense): GB_Cdense_06d__uint64
+
+// C type:   uint64_t
+
+#define GB_CTYPE \
+    uint64_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_UINT64)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint64_t cwork = (*((uint64_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__uint64
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_type__uint8.c b/Source/Generated/GB_type__uint8.c
new file mode 100644
index 0000000000..585bf31498
--- /dev/null
+++ b/Source/Generated/GB_type__uint8.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d__uint8
+// C<A>=A (C is dense): GB_Cdense_06d__uint8
+
+// C type:   uint8_t
+
+#define GB_CTYPE \
+    uint8_t
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    (GxB_NO_UINT8)
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    uint8_t cwork = (*((uint8_t *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25__uint8
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generated/GB_unaryop__abs_bool_bool.c b/Source/Generated/GB_unaryop__abs_bool_bool.c
index a2e9affba5..989eb0877c 100644
--- a/Source/Generated/GB_unaryop__abs_bool_bool.c
+++ b/Source/Generated/GB_unaryop__abs_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_fp32.c b/Source/Generated/GB_unaryop__abs_bool_fp32.c
index 3e39a86e33..10228ac768 100644
--- a/Source/Generated/GB_unaryop__abs_bool_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_bool_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_fp64.c b/Source/Generated/GB_unaryop__abs_bool_fp64.c
index e2e6be7833..922596fc44 100644
--- a/Source/Generated/GB_unaryop__abs_bool_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_bool_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_int16.c b/Source/Generated/GB_unaryop__abs_bool_int16.c
index abde5a4df3..64267d400c 100644
--- a/Source/Generated/GB_unaryop__abs_bool_int16.c
+++ b/Source/Generated/GB_unaryop__abs_bool_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_int32.c b/Source/Generated/GB_unaryop__abs_bool_int32.c
index a042d98223..740c6fb9ff 100644
--- a/Source/Generated/GB_unaryop__abs_bool_int32.c
+++ b/Source/Generated/GB_unaryop__abs_bool_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_int64.c b/Source/Generated/GB_unaryop__abs_bool_int64.c
index 2075cfbea9..29c04eafb5 100644
--- a/Source/Generated/GB_unaryop__abs_bool_int64.c
+++ b/Source/Generated/GB_unaryop__abs_bool_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_int8.c b/Source/Generated/GB_unaryop__abs_bool_int8.c
index 2b5d330efe..fd01d5e2b6 100644
--- a/Source/Generated/GB_unaryop__abs_bool_int8.c
+++ b/Source/Generated/GB_unaryop__abs_bool_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_uint16.c b/Source/Generated/GB_unaryop__abs_bool_uint16.c
index d701955aae..f988b16900 100644
--- a/Source/Generated/GB_unaryop__abs_bool_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_bool_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_uint32.c b/Source/Generated/GB_unaryop__abs_bool_uint32.c
index 76afc54f8e..1824162536 100644
--- a/Source/Generated/GB_unaryop__abs_bool_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_bool_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_uint64.c b/Source/Generated/GB_unaryop__abs_bool_uint64.c
index 7a5590c49e..08384939d4 100644
--- a/Source/Generated/GB_unaryop__abs_bool_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_bool_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_bool_uint8.c b/Source/Generated/GB_unaryop__abs_bool_uint8.c
index 7f2dbea7e0..d56f488a84 100644
--- a/Source/Generated/GB_unaryop__abs_bool_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_bool_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_bool.c b/Source/Generated/GB_unaryop__abs_fp32_bool.c
index 7faf3f4c3e..3599e384e4 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_bool.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_fp32.c b/Source/Generated/GB_unaryop__abs_fp32_fp32.c
index 989c7112e8..b7184b4a18 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_fp64.c b/Source/Generated/GB_unaryop__abs_fp32_fp64.c
index 3872c0482d..d7c9009bda 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_int16.c b/Source/Generated/GB_unaryop__abs_fp32_int16.c
index 9a7fd9d346..fbeaf886ad 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_int16.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_int32.c b/Source/Generated/GB_unaryop__abs_fp32_int32.c
index ca72383e48..f3a387f294 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_int32.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_int64.c b/Source/Generated/GB_unaryop__abs_fp32_int64.c
index 8bc9a7172c..b8bc4444dc 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_int64.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_int8.c b/Source/Generated/GB_unaryop__abs_fp32_int8.c
index f013171a0a..7aca761202 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_int8.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_uint16.c b/Source/Generated/GB_unaryop__abs_fp32_uint16.c
index b944a80753..9c21b00bb7 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_uint32.c b/Source/Generated/GB_unaryop__abs_fp32_uint32.c
index f70a1fc123..39cf5a3552 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_uint64.c b/Source/Generated/GB_unaryop__abs_fp32_uint64.c
index 91d4f0a4c3..5e822b2bba 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp32_uint8.c b/Source/Generated/GB_unaryop__abs_fp32_uint8.c
index a365b49c0e..63b612e2f1 100644
--- a/Source/Generated/GB_unaryop__abs_fp32_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_fp32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabsf (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_bool.c b/Source/Generated/GB_unaryop__abs_fp64_bool.c
index a87943f878..8f27f78bb4 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_bool.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_fp32.c b/Source/Generated/GB_unaryop__abs_fp64_fp32.c
index a47290f4a6..9c1aafe63c 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_fp64.c b/Source/Generated/GB_unaryop__abs_fp64_fp64.c
index b373a60595..ac90084196 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_int16.c b/Source/Generated/GB_unaryop__abs_fp64_int16.c
index 91ec32345c..0111d3ef59 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_int16.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_int32.c b/Source/Generated/GB_unaryop__abs_fp64_int32.c
index 7184177d32..75bd241be8 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_int32.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_int64.c b/Source/Generated/GB_unaryop__abs_fp64_int64.c
index 492466d048..aca23ddda3 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_int64.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_int8.c b/Source/Generated/GB_unaryop__abs_fp64_int8.c
index 7d89002553..f1b6769bae 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_int8.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_uint16.c b/Source/Generated/GB_unaryop__abs_fp64_uint16.c
index d4cf0aacf4..8b68318876 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_uint32.c b/Source/Generated/GB_unaryop__abs_fp64_uint32.c
index 744befbab5..a83923669f 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_uint64.c b/Source/Generated/GB_unaryop__abs_fp64_uint64.c
index 9e6708ba89..222eb23ab1 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_fp64_uint8.c b/Source/Generated/GB_unaryop__abs_fp64_uint8.c
index 2a3cb95e90..627c77dc7d 100644
--- a/Source/Generated/GB_unaryop__abs_fp64_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_fp64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = fabs (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_bool.c b/Source/Generated/GB_unaryop__abs_int16_bool.c
index df1c876aff..f97d7055a5 100644
--- a/Source/Generated/GB_unaryop__abs_int16_bool.c
+++ b/Source/Generated/GB_unaryop__abs_int16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_fp32.c b/Source/Generated/GB_unaryop__abs_int16_fp32.c
index ac0e93c99c..ed1078143f 100644
--- a/Source/Generated/GB_unaryop__abs_int16_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_int16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_fp64.c b/Source/Generated/GB_unaryop__abs_int16_fp64.c
index c572094a56..ef446e05e7 100644
--- a/Source/Generated/GB_unaryop__abs_int16_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_int16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_int16.c b/Source/Generated/GB_unaryop__abs_int16_int16.c
index dd9066d985..b97a5c18d3 100644
--- a/Source/Generated/GB_unaryop__abs_int16_int16.c
+++ b/Source/Generated/GB_unaryop__abs_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_int32.c b/Source/Generated/GB_unaryop__abs_int16_int32.c
index 65b0ee4ade..c81bb9ce29 100644
--- a/Source/Generated/GB_unaryop__abs_int16_int32.c
+++ b/Source/Generated/GB_unaryop__abs_int16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_int64.c b/Source/Generated/GB_unaryop__abs_int16_int64.c
index 61d230911f..abc0949d1c 100644
--- a/Source/Generated/GB_unaryop__abs_int16_int64.c
+++ b/Source/Generated/GB_unaryop__abs_int16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_int8.c b/Source/Generated/GB_unaryop__abs_int16_int8.c
index 4693e3fec4..949d521594 100644
--- a/Source/Generated/GB_unaryop__abs_int16_int8.c
+++ b/Source/Generated/GB_unaryop__abs_int16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_uint16.c b/Source/Generated/GB_unaryop__abs_int16_uint16.c
index bcec1f58e3..8da0fb076b 100644
--- a/Source/Generated/GB_unaryop__abs_int16_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_int16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_uint32.c b/Source/Generated/GB_unaryop__abs_int16_uint32.c
index c7cf11bc91..c11df7caff 100644
--- a/Source/Generated/GB_unaryop__abs_int16_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_int16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_uint64.c b/Source/Generated/GB_unaryop__abs_int16_uint64.c
index 8d811916cf..40488cb68f 100644
--- a/Source/Generated/GB_unaryop__abs_int16_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_int16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int16_uint8.c b/Source/Generated/GB_unaryop__abs_int16_uint8.c
index e2f56f0ca9..75edd3fece 100644
--- a/Source/Generated/GB_unaryop__abs_int16_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_int16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_bool.c b/Source/Generated/GB_unaryop__abs_int32_bool.c
index b3b7b47c23..328e7a0201 100644
--- a/Source/Generated/GB_unaryop__abs_int32_bool.c
+++ b/Source/Generated/GB_unaryop__abs_int32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_fp32.c b/Source/Generated/GB_unaryop__abs_int32_fp32.c
index b3ee34746d..7abf60b6ab 100644
--- a/Source/Generated/GB_unaryop__abs_int32_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_int32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_fp64.c b/Source/Generated/GB_unaryop__abs_int32_fp64.c
index 48a8c97af4..72fd556836 100644
--- a/Source/Generated/GB_unaryop__abs_int32_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_int32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_int16.c b/Source/Generated/GB_unaryop__abs_int32_int16.c
index c11e8484d7..8adf59bba8 100644
--- a/Source/Generated/GB_unaryop__abs_int32_int16.c
+++ b/Source/Generated/GB_unaryop__abs_int32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_int32.c b/Source/Generated/GB_unaryop__abs_int32_int32.c
index dd402056a6..572c859201 100644
--- a/Source/Generated/GB_unaryop__abs_int32_int32.c
+++ b/Source/Generated/GB_unaryop__abs_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_int64.c b/Source/Generated/GB_unaryop__abs_int32_int64.c
index 7fa55e8e5f..c3efadc902 100644
--- a/Source/Generated/GB_unaryop__abs_int32_int64.c
+++ b/Source/Generated/GB_unaryop__abs_int32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_int8.c b/Source/Generated/GB_unaryop__abs_int32_int8.c
index d10a4f40a3..67e21354f1 100644
--- a/Source/Generated/GB_unaryop__abs_int32_int8.c
+++ b/Source/Generated/GB_unaryop__abs_int32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_uint16.c b/Source/Generated/GB_unaryop__abs_int32_uint16.c
index e07e420af3..bb9392034d 100644
--- a/Source/Generated/GB_unaryop__abs_int32_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_int32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_uint32.c b/Source/Generated/GB_unaryop__abs_int32_uint32.c
index e1ec8e2c7d..756f69747b 100644
--- a/Source/Generated/GB_unaryop__abs_int32_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_int32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_uint64.c b/Source/Generated/GB_unaryop__abs_int32_uint64.c
index b700a692d0..476ee2d804 100644
--- a/Source/Generated/GB_unaryop__abs_int32_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_int32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int32_uint8.c b/Source/Generated/GB_unaryop__abs_int32_uint8.c
index 330421afb9..29242442a0 100644
--- a/Source/Generated/GB_unaryop__abs_int32_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_int32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_bool.c b/Source/Generated/GB_unaryop__abs_int64_bool.c
index 8a59caa657..5caf38a3d5 100644
--- a/Source/Generated/GB_unaryop__abs_int64_bool.c
+++ b/Source/Generated/GB_unaryop__abs_int64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_fp32.c b/Source/Generated/GB_unaryop__abs_int64_fp32.c
index bc8cb57168..da4c345df4 100644
--- a/Source/Generated/GB_unaryop__abs_int64_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_int64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_fp64.c b/Source/Generated/GB_unaryop__abs_int64_fp64.c
index ab9ba6eb22..6c6b819bb6 100644
--- a/Source/Generated/GB_unaryop__abs_int64_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_int64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_int16.c b/Source/Generated/GB_unaryop__abs_int64_int16.c
index 9d948a99b0..2a15d6915e 100644
--- a/Source/Generated/GB_unaryop__abs_int64_int16.c
+++ b/Source/Generated/GB_unaryop__abs_int64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_int32.c b/Source/Generated/GB_unaryop__abs_int64_int32.c
index c99725534e..b68d1c8b24 100644
--- a/Source/Generated/GB_unaryop__abs_int64_int32.c
+++ b/Source/Generated/GB_unaryop__abs_int64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_int64.c b/Source/Generated/GB_unaryop__abs_int64_int64.c
index 5d7eabfe99..1288ed7330 100644
--- a/Source/Generated/GB_unaryop__abs_int64_int64.c
+++ b/Source/Generated/GB_unaryop__abs_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_int8.c b/Source/Generated/GB_unaryop__abs_int64_int8.c
index 4b4e198b19..44f9a8e55a 100644
--- a/Source/Generated/GB_unaryop__abs_int64_int8.c
+++ b/Source/Generated/GB_unaryop__abs_int64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_uint16.c b/Source/Generated/GB_unaryop__abs_int64_uint16.c
index 9dd5be9f80..1499343e47 100644
--- a/Source/Generated/GB_unaryop__abs_int64_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_int64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_uint32.c b/Source/Generated/GB_unaryop__abs_int64_uint32.c
index 24794fe4bf..b59a1629ef 100644
--- a/Source/Generated/GB_unaryop__abs_int64_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_int64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_uint64.c b/Source/Generated/GB_unaryop__abs_int64_uint64.c
index 0a56f2a83b..9b56e7db75 100644
--- a/Source/Generated/GB_unaryop__abs_int64_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_int64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int64_uint8.c b/Source/Generated/GB_unaryop__abs_int64_uint8.c
index e3e5252d1c..6577066e9b 100644
--- a/Source/Generated/GB_unaryop__abs_int64_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_int64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_bool.c b/Source/Generated/GB_unaryop__abs_int8_bool.c
index f306eb662f..43cc59a445 100644
--- a/Source/Generated/GB_unaryop__abs_int8_bool.c
+++ b/Source/Generated/GB_unaryop__abs_int8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_fp32.c b/Source/Generated/GB_unaryop__abs_int8_fp32.c
index 0720c692a9..1844cb5ed2 100644
--- a/Source/Generated/GB_unaryop__abs_int8_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_int8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_fp64.c b/Source/Generated/GB_unaryop__abs_int8_fp64.c
index dc5b0b540f..3f8c244892 100644
--- a/Source/Generated/GB_unaryop__abs_int8_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_int8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_int16.c b/Source/Generated/GB_unaryop__abs_int8_int16.c
index 024f9b0f44..dd056f843f 100644
--- a/Source/Generated/GB_unaryop__abs_int8_int16.c
+++ b/Source/Generated/GB_unaryop__abs_int8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_int32.c b/Source/Generated/GB_unaryop__abs_int8_int32.c
index 3df42dc149..294921d0d2 100644
--- a/Source/Generated/GB_unaryop__abs_int8_int32.c
+++ b/Source/Generated/GB_unaryop__abs_int8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_int64.c b/Source/Generated/GB_unaryop__abs_int8_int64.c
index 00025afa2e..23325c3a58 100644
--- a/Source/Generated/GB_unaryop__abs_int8_int64.c
+++ b/Source/Generated/GB_unaryop__abs_int8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_int8.c b/Source/Generated/GB_unaryop__abs_int8_int8.c
index d37d84dd67..63aa90ce2a 100644
--- a/Source/Generated/GB_unaryop__abs_int8_int8.c
+++ b/Source/Generated/GB_unaryop__abs_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_uint16.c b/Source/Generated/GB_unaryop__abs_int8_uint16.c
index 096eca9da3..6c2c6482f3 100644
--- a/Source/Generated/GB_unaryop__abs_int8_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_int8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_uint32.c b/Source/Generated/GB_unaryop__abs_int8_uint32.c
index ded56d3b05..6592c84278 100644
--- a/Source/Generated/GB_unaryop__abs_int8_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_int8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_uint64.c b/Source/Generated/GB_unaryop__abs_int8_uint64.c
index a5998b8192..9257a03270 100644
--- a/Source/Generated/GB_unaryop__abs_int8_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_int8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_int8_uint8.c b/Source/Generated/GB_unaryop__abs_int8_uint8.c
index 6e35b96e7a..c3ce32fa21 100644
--- a/Source/Generated/GB_unaryop__abs_int8_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_int8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IABS (x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_bool.c b/Source/Generated/GB_unaryop__abs_uint16_bool.c
index dc08afbe58..324e2a6021 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_bool.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_fp32.c b/Source/Generated/GB_unaryop__abs_uint16_fp32.c
index 89c8f3a20a..7e9bed4d59 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_fp64.c b/Source/Generated/GB_unaryop__abs_uint16_fp64.c
index 76679fd685..9c68db7838 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_int16.c b/Source/Generated/GB_unaryop__abs_uint16_int16.c
index 1f2bbc2ed1..0428bfe9d4 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_int16.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_int32.c b/Source/Generated/GB_unaryop__abs_uint16_int32.c
index 148783cce2..5cf43c437a 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_int32.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_int64.c b/Source/Generated/GB_unaryop__abs_uint16_int64.c
index 931ef231c9..c5ea6021a3 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_int64.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_int8.c b/Source/Generated/GB_unaryop__abs_uint16_int8.c
index f1b31635f3..6c35210b41 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_int8.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_uint16.c b/Source/Generated/GB_unaryop__abs_uint16_uint16.c
index b260d55436..de3fc82541 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_uint32.c b/Source/Generated/GB_unaryop__abs_uint16_uint32.c
index 6477f11485..f8d4e6ce33 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_uint64.c b/Source/Generated/GB_unaryop__abs_uint16_uint64.c
index 2a3ac6404b..b5ac933140 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint16_uint8.c b/Source/Generated/GB_unaryop__abs_uint16_uint8.c
index b0d353e435..b95c80d4f9 100644
--- a/Source/Generated/GB_unaryop__abs_uint16_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_uint16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_bool.c b/Source/Generated/GB_unaryop__abs_uint32_bool.c
index d534f42bca..a4eedd3f1f 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_bool.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_fp32.c b/Source/Generated/GB_unaryop__abs_uint32_fp32.c
index 55c21114ce..2fbdce073a 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_fp64.c b/Source/Generated/GB_unaryop__abs_uint32_fp64.c
index b071cf6245..c00574226e 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_int16.c b/Source/Generated/GB_unaryop__abs_uint32_int16.c
index 922bc124c9..5f91e39574 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_int16.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_int32.c b/Source/Generated/GB_unaryop__abs_uint32_int32.c
index 602efb1df8..e6a3ff425a 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_int32.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_int64.c b/Source/Generated/GB_unaryop__abs_uint32_int64.c
index bb72986b80..d9ad119beb 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_int64.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_int8.c b/Source/Generated/GB_unaryop__abs_uint32_int8.c
index 6a90796e3e..bfed42990f 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_int8.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_uint16.c b/Source/Generated/GB_unaryop__abs_uint32_uint16.c
index ca06daf7b5..e89036c0be 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_uint32.c b/Source/Generated/GB_unaryop__abs_uint32_uint32.c
index ee67e8ed6d..ab849941f2 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_uint64.c b/Source/Generated/GB_unaryop__abs_uint32_uint64.c
index ad4436d515..b993cedbd0 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint32_uint8.c b/Source/Generated/GB_unaryop__abs_uint32_uint8.c
index 916722ffd9..0393b347ee 100644
--- a/Source/Generated/GB_unaryop__abs_uint32_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_uint32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_bool.c b/Source/Generated/GB_unaryop__abs_uint64_bool.c
index f0fbdd0ca2..73869893c1 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_bool.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_fp32.c b/Source/Generated/GB_unaryop__abs_uint64_fp32.c
index ebf0f94da1..7bb8f48b9d 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_fp64.c b/Source/Generated/GB_unaryop__abs_uint64_fp64.c
index 7a28df1a08..92e82080e7 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_int16.c b/Source/Generated/GB_unaryop__abs_uint64_int16.c
index afb27d2232..c2daf60d7b 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_int16.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_int32.c b/Source/Generated/GB_unaryop__abs_uint64_int32.c
index 8ad8533b1b..bb5b77dd7b 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_int32.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_int64.c b/Source/Generated/GB_unaryop__abs_uint64_int64.c
index f773096433..498dc16b75 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_int64.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_int8.c b/Source/Generated/GB_unaryop__abs_uint64_int8.c
index 677b723972..5a3c5dabd3 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_int8.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_uint16.c b/Source/Generated/GB_unaryop__abs_uint64_uint16.c
index f7f1ff60e5..2b165becd2 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_uint32.c b/Source/Generated/GB_unaryop__abs_uint64_uint32.c
index 5e7496ed6b..a542375717 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_uint64.c b/Source/Generated/GB_unaryop__abs_uint64_uint64.c
index fd2151d542..a23358d622 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint64_uint8.c b/Source/Generated/GB_unaryop__abs_uint64_uint8.c
index 1a599df678..e2a46ce597 100644
--- a/Source/Generated/GB_unaryop__abs_uint64_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_uint64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_bool.c b/Source/Generated/GB_unaryop__abs_uint8_bool.c
index bc350ef550..50430cb93b 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_bool.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_fp32.c b/Source/Generated/GB_unaryop__abs_uint8_fp32.c
index aea802e1c4..bc99234adb 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_fp32.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_fp64.c b/Source/Generated/GB_unaryop__abs_uint8_fp64.c
index ac39e55de9..2cf34eed48 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_fp64.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_int16.c b/Source/Generated/GB_unaryop__abs_uint8_int16.c
index ca024f24b9..866cf575ff 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_int16.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_int32.c b/Source/Generated/GB_unaryop__abs_uint8_int32.c
index 0286190771..93838faaf4 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_int32.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_int64.c b/Source/Generated/GB_unaryop__abs_uint8_int64.c
index 3d726ee993..2a0d1e199f 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_int64.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_int8.c b/Source/Generated/GB_unaryop__abs_uint8_int8.c
index 17c082c516..e4f9bd0c89 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_int8.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_uint16.c b/Source/Generated/GB_unaryop__abs_uint8_uint16.c
index d899d8d15f..caa6c49c0d 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_uint16.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_uint32.c b/Source/Generated/GB_unaryop__abs_uint8_uint32.c
index 89571c68ff..bdb0f70cce 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_uint32.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_uint64.c b/Source/Generated/GB_unaryop__abs_uint8_uint64.c
index f27b75b088..0b5b4accbd 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_uint64.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__abs_uint8_uint8.c b/Source/Generated/GB_unaryop__abs_uint8_uint8.c
index 649f10b28b..b8a7b5fe3f 100644
--- a/Source/Generated/GB_unaryop__abs_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__abs_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__abs_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_bool.c b/Source/Generated/GB_unaryop__ainv_bool_bool.c
index e00f9a4b75..e1cb9be2de 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_fp32.c b/Source/Generated/GB_unaryop__ainv_bool_fp32.c
index 6e0e74e66e..9b7ba587bb 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_fp64.c b/Source/Generated/GB_unaryop__ainv_bool_fp64.c
index 9d0999ba08..91151d9273 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_int16.c b/Source/Generated/GB_unaryop__ainv_bool_int16.c
index b6c77e06f3..518225e199 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_int32.c b/Source/Generated/GB_unaryop__ainv_bool_int32.c
index ab3f5b28bd..f801e68946 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_int64.c b/Source/Generated/GB_unaryop__ainv_bool_int64.c
index cee5effe3c..56455758d3 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_int8.c b/Source/Generated/GB_unaryop__ainv_bool_int8.c
index da0c5953bc..e46fd48993 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_uint16.c b/Source/Generated/GB_unaryop__ainv_bool_uint16.c
index d8c59272ae..7cd2e34830 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_uint32.c b/Source/Generated/GB_unaryop__ainv_bool_uint32.c
index 16247c1fb3..1006afdfb3 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_uint64.c b/Source/Generated/GB_unaryop__ainv_bool_uint64.c
index 9858f03615..39038330b6 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_bool_uint8.c b/Source/Generated/GB_unaryop__ainv_bool_uint8.c
index 28387ce727..0ef3fb03e5 100644
--- a/Source/Generated/GB_unaryop__ainv_bool_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_bool_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_bool.c b/Source/Generated/GB_unaryop__ainv_fp32_bool.c
index a2dfc2f7b0..13a116e23e 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_fp32.c b/Source/Generated/GB_unaryop__ainv_fp32_fp32.c
index e56fc692f9..259bb5c508 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_fp64.c b/Source/Generated/GB_unaryop__ainv_fp32_fp64.c
index 58983c05e4..99bd19003c 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_int16.c b/Source/Generated/GB_unaryop__ainv_fp32_int16.c
index ee029832fd..1a2a136b67 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_int32.c b/Source/Generated/GB_unaryop__ainv_fp32_int32.c
index 7200845441..2101bdde98 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_int64.c b/Source/Generated/GB_unaryop__ainv_fp32_int64.c
index 54bc4760e4..6c5fd0815a 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_int8.c b/Source/Generated/GB_unaryop__ainv_fp32_int8.c
index bb53101f5b..e82df52041 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_uint16.c b/Source/Generated/GB_unaryop__ainv_fp32_uint16.c
index 14e4616eb9..ed87784883 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_uint32.c b/Source/Generated/GB_unaryop__ainv_fp32_uint32.c
index 7a676efa56..9f684f3575 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_uint64.c b/Source/Generated/GB_unaryop__ainv_fp32_uint64.c
index f0f833d6f5..1a438eae8b 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp32_uint8.c b/Source/Generated/GB_unaryop__ainv_fp32_uint8.c
index ad1394d70f..785c2c17c0 100644
--- a/Source/Generated/GB_unaryop__ainv_fp32_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_fp32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_bool.c b/Source/Generated/GB_unaryop__ainv_fp64_bool.c
index 2c3161524a..685dbfe6a9 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_fp32.c b/Source/Generated/GB_unaryop__ainv_fp64_fp32.c
index 2e69765bf1..f4ad554c85 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_fp64.c b/Source/Generated/GB_unaryop__ainv_fp64_fp64.c
index f2942c048f..bdac0e4f94 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_int16.c b/Source/Generated/GB_unaryop__ainv_fp64_int16.c
index 199f772518..9c5f9f9450 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_int32.c b/Source/Generated/GB_unaryop__ainv_fp64_int32.c
index 117ec144ee..6a17df5f2a 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_int64.c b/Source/Generated/GB_unaryop__ainv_fp64_int64.c
index 8f79ac7fb1..8499487740 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_int8.c b/Source/Generated/GB_unaryop__ainv_fp64_int8.c
index ddca03ccb2..edd70020bd 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_uint16.c b/Source/Generated/GB_unaryop__ainv_fp64_uint16.c
index a3d81d39c7..18e8a6548b 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_uint32.c b/Source/Generated/GB_unaryop__ainv_fp64_uint32.c
index 505fa34851..0b7837c552 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_uint64.c b/Source/Generated/GB_unaryop__ainv_fp64_uint64.c
index 5a0668b18d..312964b13d 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_fp64_uint8.c b/Source/Generated/GB_unaryop__ainv_fp64_uint8.c
index 19d72278f4..3e48ab4088 100644
--- a/Source/Generated/GB_unaryop__ainv_fp64_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_fp64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_bool.c b/Source/Generated/GB_unaryop__ainv_int16_bool.c
index 03cba45749..66e3e8b4f4 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_fp32.c b/Source/Generated/GB_unaryop__ainv_int16_fp32.c
index 2f97fee044..f5f22bf0bc 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_fp64.c b/Source/Generated/GB_unaryop__ainv_int16_fp64.c
index af14a7f18f..d1c76b5444 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_int16.c b/Source/Generated/GB_unaryop__ainv_int16_int16.c
index 2dc23794e5..da8003e4bf 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_int32.c b/Source/Generated/GB_unaryop__ainv_int16_int32.c
index 58258beb45..2622ef4538 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_int64.c b/Source/Generated/GB_unaryop__ainv_int16_int64.c
index 79ff0280dd..8768539afd 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_int8.c b/Source/Generated/GB_unaryop__ainv_int16_int8.c
index 85fc5de3ea..d2aa0c57a0 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_uint16.c b/Source/Generated/GB_unaryop__ainv_int16_uint16.c
index 5ebe2493ee..2f42dc93b4 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_uint32.c b/Source/Generated/GB_unaryop__ainv_int16_uint32.c
index 224981383c..fc705b61ae 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_uint64.c b/Source/Generated/GB_unaryop__ainv_int16_uint64.c
index baf6aa64cd..836be653e8 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int16_uint8.c b/Source/Generated/GB_unaryop__ainv_int16_uint8.c
index 83f020ea0b..a306337745 100644
--- a/Source/Generated/GB_unaryop__ainv_int16_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_int16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_bool.c b/Source/Generated/GB_unaryop__ainv_int32_bool.c
index 91cd3f9b96..a2ff71319a 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_fp32.c b/Source/Generated/GB_unaryop__ainv_int32_fp32.c
index eb1d2e06c4..0928ae1426 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_fp64.c b/Source/Generated/GB_unaryop__ainv_int32_fp64.c
index 614eebd9ef..ded335f855 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_int16.c b/Source/Generated/GB_unaryop__ainv_int32_int16.c
index 4a22f2a37e..48558fd016 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_int32.c b/Source/Generated/GB_unaryop__ainv_int32_int32.c
index a0eee80359..7fe3ab0e33 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_int64.c b/Source/Generated/GB_unaryop__ainv_int32_int64.c
index 07a5806256..831c7cc170 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_int8.c b/Source/Generated/GB_unaryop__ainv_int32_int8.c
index f60fb56a89..aec85f6c22 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_uint16.c b/Source/Generated/GB_unaryop__ainv_int32_uint16.c
index 782a636d2f..44fab9c28a 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_uint32.c b/Source/Generated/GB_unaryop__ainv_int32_uint32.c
index 0147651db8..facf07d486 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_uint64.c b/Source/Generated/GB_unaryop__ainv_int32_uint64.c
index 7d5a0196f8..d8a8344f94 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int32_uint8.c b/Source/Generated/GB_unaryop__ainv_int32_uint8.c
index 7fff70bf3a..081042c566 100644
--- a/Source/Generated/GB_unaryop__ainv_int32_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_int32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_bool.c b/Source/Generated/GB_unaryop__ainv_int64_bool.c
index ebf96d03c4..9f1c450462 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_fp32.c b/Source/Generated/GB_unaryop__ainv_int64_fp32.c
index e0c99f0b1c..c5e49e0d4e 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_fp64.c b/Source/Generated/GB_unaryop__ainv_int64_fp64.c
index 1bd781bb57..0d8ce64118 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_int16.c b/Source/Generated/GB_unaryop__ainv_int64_int16.c
index 1e76325b8e..ebc08d07f3 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_int32.c b/Source/Generated/GB_unaryop__ainv_int64_int32.c
index 7170331e70..59fd2697bc 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_int64.c b/Source/Generated/GB_unaryop__ainv_int64_int64.c
index 393a3cc216..4b0fc6fdfd 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_int8.c b/Source/Generated/GB_unaryop__ainv_int64_int8.c
index 0a695ccd84..35a476b5af 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_uint16.c b/Source/Generated/GB_unaryop__ainv_int64_uint16.c
index eebb841455..d9933cb040 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_uint32.c b/Source/Generated/GB_unaryop__ainv_int64_uint32.c
index 542be8aea0..c4cf9e816c 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_uint64.c b/Source/Generated/GB_unaryop__ainv_int64_uint64.c
index e4fdc63142..49a26ed21d 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int64_uint8.c b/Source/Generated/GB_unaryop__ainv_int64_uint8.c
index 62fb6e9f5b..5adc3dfe35 100644
--- a/Source/Generated/GB_unaryop__ainv_int64_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_int64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_bool.c b/Source/Generated/GB_unaryop__ainv_int8_bool.c
index 2ab5bc7765..61dc682958 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_fp32.c b/Source/Generated/GB_unaryop__ainv_int8_fp32.c
index 1983e286c5..402c3f4636 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_fp64.c b/Source/Generated/GB_unaryop__ainv_int8_fp64.c
index c16828e149..bc6242a8c9 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_int16.c b/Source/Generated/GB_unaryop__ainv_int8_int16.c
index 1199f282ef..30c5df7535 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_int32.c b/Source/Generated/GB_unaryop__ainv_int8_int32.c
index 9d6ad3f3df..a40502886c 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_int64.c b/Source/Generated/GB_unaryop__ainv_int8_int64.c
index 12ca5510f1..f5e05b0abd 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_int8.c b/Source/Generated/GB_unaryop__ainv_int8_int8.c
index 5df2490f26..25dd264c21 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_uint16.c b/Source/Generated/GB_unaryop__ainv_int8_uint16.c
index c13c398317..6ff5d13fcc 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_uint32.c b/Source/Generated/GB_unaryop__ainv_int8_uint32.c
index d46f19ecd4..72e7673399 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_uint64.c b/Source/Generated/GB_unaryop__ainv_int8_uint64.c
index c67f5f00c3..88b958e1d2 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_int8_uint8.c b/Source/Generated/GB_unaryop__ainv_int8_uint8.c
index 0f2952ffb5..634c2cc86a 100644
--- a/Source/Generated/GB_unaryop__ainv_int8_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_int8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_bool.c b/Source/Generated/GB_unaryop__ainv_uint16_bool.c
index 2edc914f5a..6533a54a74 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_fp32.c b/Source/Generated/GB_unaryop__ainv_uint16_fp32.c
index f401c79329..b641abdde4 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_fp64.c b/Source/Generated/GB_unaryop__ainv_uint16_fp64.c
index b86a3c5124..12f3ea727e 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_int16.c b/Source/Generated/GB_unaryop__ainv_uint16_int16.c
index 2cb90994ab..522cbaed36 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_int32.c b/Source/Generated/GB_unaryop__ainv_uint16_int32.c
index d8dacfa8c5..add010bfcb 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_int64.c b/Source/Generated/GB_unaryop__ainv_uint16_int64.c
index 1e95741511..7c871e9919 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_int8.c b/Source/Generated/GB_unaryop__ainv_uint16_int8.c
index 412d1bbe9f..ffc61d52d7 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_uint16.c b/Source/Generated/GB_unaryop__ainv_uint16_uint16.c
index fd5b87c173..eff09c76cd 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_uint32.c b/Source/Generated/GB_unaryop__ainv_uint16_uint32.c
index b948a8e041..047759808a 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_uint64.c b/Source/Generated/GB_unaryop__ainv_uint16_uint64.c
index 3f50eb6c1d..14a21a55f0 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint16_uint8.c b/Source/Generated/GB_unaryop__ainv_uint16_uint8.c
index 41318d698f..83ca4bd6b8 100644
--- a/Source/Generated/GB_unaryop__ainv_uint16_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_bool.c b/Source/Generated/GB_unaryop__ainv_uint32_bool.c
index 852108e6c4..19248ca8f9 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_fp32.c b/Source/Generated/GB_unaryop__ainv_uint32_fp32.c
index fb34b16ec6..5b065e22f1 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_fp64.c b/Source/Generated/GB_unaryop__ainv_uint32_fp64.c
index 12938fc271..ebf58acc0d 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_int16.c b/Source/Generated/GB_unaryop__ainv_uint32_int16.c
index 5cd12bb397..e35ed75f63 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_int32.c b/Source/Generated/GB_unaryop__ainv_uint32_int32.c
index fc3c9eea3b..3f2b163b4a 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_int64.c b/Source/Generated/GB_unaryop__ainv_uint32_int64.c
index e85ab6882f..5c13e702be 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_int8.c b/Source/Generated/GB_unaryop__ainv_uint32_int8.c
index 5bf5be5862..bb24d777b4 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_uint16.c b/Source/Generated/GB_unaryop__ainv_uint32_uint16.c
index 40c465a43e..8b2bafe919 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_uint32.c b/Source/Generated/GB_unaryop__ainv_uint32_uint32.c
index af1364b3a5..6504810a7d 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_uint64.c b/Source/Generated/GB_unaryop__ainv_uint32_uint64.c
index dcd9399d61..45f970ec36 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint32_uint8.c b/Source/Generated/GB_unaryop__ainv_uint32_uint8.c
index f14214c105..0ca48163ff 100644
--- a/Source/Generated/GB_unaryop__ainv_uint32_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_bool.c b/Source/Generated/GB_unaryop__ainv_uint64_bool.c
index ee74ed99a6..822410ebf2 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_fp32.c b/Source/Generated/GB_unaryop__ainv_uint64_fp32.c
index 4288d095bf..94cee534a7 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_fp64.c b/Source/Generated/GB_unaryop__ainv_uint64_fp64.c
index 1f006da27e..c517640567 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_int16.c b/Source/Generated/GB_unaryop__ainv_uint64_int16.c
index 65152eaaa7..aa5c0551db 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_int32.c b/Source/Generated/GB_unaryop__ainv_uint64_int32.c
index ca591affad..0f8b64c344 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_int64.c b/Source/Generated/GB_unaryop__ainv_uint64_int64.c
index 0445fc249c..cd7117e6a3 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_int8.c b/Source/Generated/GB_unaryop__ainv_uint64_int8.c
index 5d946a6d0d..438d055fca 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_uint16.c b/Source/Generated/GB_unaryop__ainv_uint64_uint16.c
index ac28b2184b..f21b981fed 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_uint32.c b/Source/Generated/GB_unaryop__ainv_uint64_uint32.c
index b40495a5ba..4c49b746ad 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_uint64.c b/Source/Generated/GB_unaryop__ainv_uint64_uint64.c
index 7dee56f19d..c892561a7b 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint64_uint8.c b/Source/Generated/GB_unaryop__ainv_uint64_uint8.c
index f77e305d89..3b8eb0a0d2 100644
--- a/Source/Generated/GB_unaryop__ainv_uint64_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_bool.c b/Source/Generated/GB_unaryop__ainv_uint8_bool.c
index a34be46f05..99e4dd9250 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_bool.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_fp32.c b/Source/Generated/GB_unaryop__ainv_uint8_fp32.c
index 7e29d71eb0..2948110451 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_fp32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_fp64.c b/Source/Generated/GB_unaryop__ainv_uint8_fp64.c
index 54fa84fede..1f0f7cae4b 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_fp64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_int16.c b/Source/Generated/GB_unaryop__ainv_uint8_int16.c
index 8827f60ecc..2cd973a741 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_int16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_int32.c b/Source/Generated/GB_unaryop__ainv_uint8_int32.c
index 348af812ce..a0a5b652a5 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_int32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_int64.c b/Source/Generated/GB_unaryop__ainv_uint8_int64.c
index e56311a3b2..1e084c62f3 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_int64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_int8.c b/Source/Generated/GB_unaryop__ainv_uint8_int8.c
index 5b673cc34d..9445a4d9d5 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_int8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_uint16.c b/Source/Generated/GB_unaryop__ainv_uint8_uint16.c
index 4481fd9dd7..c647115da7 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_uint16.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_uint32.c b/Source/Generated/GB_unaryop__ainv_uint8_uint32.c
index 69453b39fc..00e6f7086b 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_uint32.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_uint64.c b/Source/Generated/GB_unaryop__ainv_uint8_uint64.c
index 8c87758aea..c3fa673328 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_uint64.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__ainv_uint8_uint8.c b/Source/Generated/GB_unaryop__ainv_uint8_uint8.c
index 5e90e3c41e..4b0933d9e4 100644
--- a/Source/Generated/GB_unaryop__ainv_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__ainv_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = -x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__ainv_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_bool.c b/Source/Generated/GB_unaryop__identity_bool_bool.c
index c6e86346e2..d76221de05 100644
--- a/Source/Generated/GB_unaryop__identity_bool_bool.c
+++ b/Source/Generated/GB_unaryop__identity_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_fp32.c b/Source/Generated/GB_unaryop__identity_bool_fp32.c
index ded8e8dc9a..2e5000ed2f 100644
--- a/Source/Generated/GB_unaryop__identity_bool_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_bool_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_fp64.c b/Source/Generated/GB_unaryop__identity_bool_fp64.c
index fbac36479b..c2508d282b 100644
--- a/Source/Generated/GB_unaryop__identity_bool_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_bool_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_int16.c b/Source/Generated/GB_unaryop__identity_bool_int16.c
index 0ce5aa0362..2e33bbf801 100644
--- a/Source/Generated/GB_unaryop__identity_bool_int16.c
+++ b/Source/Generated/GB_unaryop__identity_bool_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_int32.c b/Source/Generated/GB_unaryop__identity_bool_int32.c
index 74a5f5b87c..4e3a5d12c6 100644
--- a/Source/Generated/GB_unaryop__identity_bool_int32.c
+++ b/Source/Generated/GB_unaryop__identity_bool_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_int64.c b/Source/Generated/GB_unaryop__identity_bool_int64.c
index 1fbc424fa0..8af4af36b1 100644
--- a/Source/Generated/GB_unaryop__identity_bool_int64.c
+++ b/Source/Generated/GB_unaryop__identity_bool_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_int8.c b/Source/Generated/GB_unaryop__identity_bool_int8.c
index 39db8e9298..1a5a7b4395 100644
--- a/Source/Generated/GB_unaryop__identity_bool_int8.c
+++ b/Source/Generated/GB_unaryop__identity_bool_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_uint16.c b/Source/Generated/GB_unaryop__identity_bool_uint16.c
index bed47b3cda..b66a75131c 100644
--- a/Source/Generated/GB_unaryop__identity_bool_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_bool_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_uint32.c b/Source/Generated/GB_unaryop__identity_bool_uint32.c
index db28bac85d..4f17dc9bec 100644
--- a/Source/Generated/GB_unaryop__identity_bool_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_bool_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_uint64.c b/Source/Generated/GB_unaryop__identity_bool_uint64.c
index 71639444de..f2daf58b63 100644
--- a/Source/Generated/GB_unaryop__identity_bool_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_bool_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_bool_uint8.c b/Source/Generated/GB_unaryop__identity_bool_uint8.c
index b084b79790..d323b1591c 100644
--- a/Source/Generated/GB_unaryop__identity_bool_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_bool_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_bool.c b/Source/Generated/GB_unaryop__identity_fp32_bool.c
index 971f22bdb3..9c0845d772 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_bool.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_fp32.c b/Source/Generated/GB_unaryop__identity_fp32_fp32.c
index 2f71345d3e..65df0c57f4 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_fp64.c b/Source/Generated/GB_unaryop__identity_fp32_fp64.c
index a3435ffe1c..f021cb5907 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_int16.c b/Source/Generated/GB_unaryop__identity_fp32_int16.c
index ce6bcf7c59..849ca23061 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_int16.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_int32.c b/Source/Generated/GB_unaryop__identity_fp32_int32.c
index e4ed189625..488e41acc5 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_int32.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_int64.c b/Source/Generated/GB_unaryop__identity_fp32_int64.c
index 0b24da2986..a7aaf9a068 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_int64.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_int8.c b/Source/Generated/GB_unaryop__identity_fp32_int8.c
index e47d28f51c..31c81c5a99 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_int8.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_uint16.c b/Source/Generated/GB_unaryop__identity_fp32_uint16.c
index 6a510dbcc7..8a58d44467 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_uint32.c b/Source/Generated/GB_unaryop__identity_fp32_uint32.c
index 672031eeec..ed6b379e7b 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_uint64.c b/Source/Generated/GB_unaryop__identity_fp32_uint64.c
index f590c42919..4061aa56f9 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp32_uint8.c b/Source/Generated/GB_unaryop__identity_fp32_uint8.c
index d8971eec60..1bfd9005b1 100644
--- a/Source/Generated/GB_unaryop__identity_fp32_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_fp32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_bool.c b/Source/Generated/GB_unaryop__identity_fp64_bool.c
index b4f033157b..7aae1e8bd8 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_bool.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_fp32.c b/Source/Generated/GB_unaryop__identity_fp64_fp32.c
index 5bd82bed0c..7e6a017cfa 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_fp64.c b/Source/Generated/GB_unaryop__identity_fp64_fp64.c
index 16d2178ec5..6ef85fcdce 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_int16.c b/Source/Generated/GB_unaryop__identity_fp64_int16.c
index 7852f4647e..eb96df2824 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_int16.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_int32.c b/Source/Generated/GB_unaryop__identity_fp64_int32.c
index f9300ad17e..ba15a5a265 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_int32.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_int64.c b/Source/Generated/GB_unaryop__identity_fp64_int64.c
index c0889b7ffd..59cfa7e3f6 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_int64.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_int8.c b/Source/Generated/GB_unaryop__identity_fp64_int8.c
index a76ee64135..c30c7da8f1 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_int8.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_uint16.c b/Source/Generated/GB_unaryop__identity_fp64_uint16.c
index 5dd263091e..e013ff0105 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_uint32.c b/Source/Generated/GB_unaryop__identity_fp64_uint32.c
index ea4fc15f46..137fd5e695 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_uint64.c b/Source/Generated/GB_unaryop__identity_fp64_uint64.c
index 79f4e7cca0..08ead37869 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_fp64_uint8.c b/Source/Generated/GB_unaryop__identity_fp64_uint8.c
index 9db47c5352..15fbd0872f 100644
--- a/Source/Generated/GB_unaryop__identity_fp64_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_fp64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_bool.c b/Source/Generated/GB_unaryop__identity_int16_bool.c
index 11e551bc7b..1d40ea847a 100644
--- a/Source/Generated/GB_unaryop__identity_int16_bool.c
+++ b/Source/Generated/GB_unaryop__identity_int16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_fp32.c b/Source/Generated/GB_unaryop__identity_int16_fp32.c
index 4a3251c33c..795cf05a14 100644
--- a/Source/Generated/GB_unaryop__identity_int16_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_int16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_fp64.c b/Source/Generated/GB_unaryop__identity_int16_fp64.c
index afc5d20d45..574bbf4242 100644
--- a/Source/Generated/GB_unaryop__identity_int16_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_int16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_int16.c b/Source/Generated/GB_unaryop__identity_int16_int16.c
index 9b11f69e82..266445d0e6 100644
--- a/Source/Generated/GB_unaryop__identity_int16_int16.c
+++ b/Source/Generated/GB_unaryop__identity_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_int32.c b/Source/Generated/GB_unaryop__identity_int16_int32.c
index 7bd0f55eae..0e114cd1dd 100644
--- a/Source/Generated/GB_unaryop__identity_int16_int32.c
+++ b/Source/Generated/GB_unaryop__identity_int16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_int64.c b/Source/Generated/GB_unaryop__identity_int16_int64.c
index cd4d49cd8f..1807072416 100644
--- a/Source/Generated/GB_unaryop__identity_int16_int64.c
+++ b/Source/Generated/GB_unaryop__identity_int16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_int8.c b/Source/Generated/GB_unaryop__identity_int16_int8.c
index c4c47fec77..769ccfc7a3 100644
--- a/Source/Generated/GB_unaryop__identity_int16_int8.c
+++ b/Source/Generated/GB_unaryop__identity_int16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_uint16.c b/Source/Generated/GB_unaryop__identity_int16_uint16.c
index 800d255f95..10d5b24ee3 100644
--- a/Source/Generated/GB_unaryop__identity_int16_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_int16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_uint32.c b/Source/Generated/GB_unaryop__identity_int16_uint32.c
index b97b039a93..c7ef7bd74a 100644
--- a/Source/Generated/GB_unaryop__identity_int16_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_int16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_uint64.c b/Source/Generated/GB_unaryop__identity_int16_uint64.c
index b2665990f1..010d9aa773 100644
--- a/Source/Generated/GB_unaryop__identity_int16_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_int16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int16_uint8.c b/Source/Generated/GB_unaryop__identity_int16_uint8.c
index 41e3aa0b64..a748997169 100644
--- a/Source/Generated/GB_unaryop__identity_int16_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_int16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_bool.c b/Source/Generated/GB_unaryop__identity_int32_bool.c
index 1c6da141e6..7b507eaf73 100644
--- a/Source/Generated/GB_unaryop__identity_int32_bool.c
+++ b/Source/Generated/GB_unaryop__identity_int32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_fp32.c b/Source/Generated/GB_unaryop__identity_int32_fp32.c
index c106a6d72f..c0b86acf88 100644
--- a/Source/Generated/GB_unaryop__identity_int32_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_int32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_fp64.c b/Source/Generated/GB_unaryop__identity_int32_fp64.c
index a3bb9e63ce..2d7bcffff2 100644
--- a/Source/Generated/GB_unaryop__identity_int32_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_int32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_int16.c b/Source/Generated/GB_unaryop__identity_int32_int16.c
index aeeaad6a8c..70784bb715 100644
--- a/Source/Generated/GB_unaryop__identity_int32_int16.c
+++ b/Source/Generated/GB_unaryop__identity_int32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_int32.c b/Source/Generated/GB_unaryop__identity_int32_int32.c
index fcb65cbc15..79c87cf854 100644
--- a/Source/Generated/GB_unaryop__identity_int32_int32.c
+++ b/Source/Generated/GB_unaryop__identity_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_int64.c b/Source/Generated/GB_unaryop__identity_int32_int64.c
index 53ed0c1d61..dc88235de0 100644
--- a/Source/Generated/GB_unaryop__identity_int32_int64.c
+++ b/Source/Generated/GB_unaryop__identity_int32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_int8.c b/Source/Generated/GB_unaryop__identity_int32_int8.c
index c47cbd8dbf..97e7e609fe 100644
--- a/Source/Generated/GB_unaryop__identity_int32_int8.c
+++ b/Source/Generated/GB_unaryop__identity_int32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_uint16.c b/Source/Generated/GB_unaryop__identity_int32_uint16.c
index c4b715e26b..ff2438f28c 100644
--- a/Source/Generated/GB_unaryop__identity_int32_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_int32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_uint32.c b/Source/Generated/GB_unaryop__identity_int32_uint32.c
index 3224f3fbd3..e263416b7a 100644
--- a/Source/Generated/GB_unaryop__identity_int32_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_int32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_uint64.c b/Source/Generated/GB_unaryop__identity_int32_uint64.c
index b81d0f9a7a..5c8d2e140c 100644
--- a/Source/Generated/GB_unaryop__identity_int32_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_int32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int32_uint8.c b/Source/Generated/GB_unaryop__identity_int32_uint8.c
index 34a7584c6b..119fc5fa18 100644
--- a/Source/Generated/GB_unaryop__identity_int32_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_int32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_bool.c b/Source/Generated/GB_unaryop__identity_int64_bool.c
index 0d57fcee98..8d7cff1dae 100644
--- a/Source/Generated/GB_unaryop__identity_int64_bool.c
+++ b/Source/Generated/GB_unaryop__identity_int64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_fp32.c b/Source/Generated/GB_unaryop__identity_int64_fp32.c
index 5638ec7318..a5b98cf1a1 100644
--- a/Source/Generated/GB_unaryop__identity_int64_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_int64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_fp64.c b/Source/Generated/GB_unaryop__identity_int64_fp64.c
index 0133ad74d3..6dea6e73f0 100644
--- a/Source/Generated/GB_unaryop__identity_int64_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_int64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_int16.c b/Source/Generated/GB_unaryop__identity_int64_int16.c
index 6b73d64a87..1b83d9b381 100644
--- a/Source/Generated/GB_unaryop__identity_int64_int16.c
+++ b/Source/Generated/GB_unaryop__identity_int64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_int32.c b/Source/Generated/GB_unaryop__identity_int64_int32.c
index 2cc5775b20..576abf1af9 100644
--- a/Source/Generated/GB_unaryop__identity_int64_int32.c
+++ b/Source/Generated/GB_unaryop__identity_int64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_int64.c b/Source/Generated/GB_unaryop__identity_int64_int64.c
index dc5c2310d4..720ba18ff3 100644
--- a/Source/Generated/GB_unaryop__identity_int64_int64.c
+++ b/Source/Generated/GB_unaryop__identity_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_int8.c b/Source/Generated/GB_unaryop__identity_int64_int8.c
index 8a6b61bf2d..403a450793 100644
--- a/Source/Generated/GB_unaryop__identity_int64_int8.c
+++ b/Source/Generated/GB_unaryop__identity_int64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_uint16.c b/Source/Generated/GB_unaryop__identity_int64_uint16.c
index 79cac92ab2..e211fdb341 100644
--- a/Source/Generated/GB_unaryop__identity_int64_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_int64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_uint32.c b/Source/Generated/GB_unaryop__identity_int64_uint32.c
index cf8b527e47..e393a27dff 100644
--- a/Source/Generated/GB_unaryop__identity_int64_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_int64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_uint64.c b/Source/Generated/GB_unaryop__identity_int64_uint64.c
index 6b7f05f31c..a5413c3614 100644
--- a/Source/Generated/GB_unaryop__identity_int64_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_int64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int64_uint8.c b/Source/Generated/GB_unaryop__identity_int64_uint8.c
index e9d5957a0b..f45b9226e0 100644
--- a/Source/Generated/GB_unaryop__identity_int64_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_int64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_bool.c b/Source/Generated/GB_unaryop__identity_int8_bool.c
index b892e0eb8f..45c3343917 100644
--- a/Source/Generated/GB_unaryop__identity_int8_bool.c
+++ b/Source/Generated/GB_unaryop__identity_int8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_fp32.c b/Source/Generated/GB_unaryop__identity_int8_fp32.c
index d088cac90d..d99f29d9a0 100644
--- a/Source/Generated/GB_unaryop__identity_int8_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_int8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_fp64.c b/Source/Generated/GB_unaryop__identity_int8_fp64.c
index 7d763b2b03..d56ae0f60d 100644
--- a/Source/Generated/GB_unaryop__identity_int8_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_int8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_int16.c b/Source/Generated/GB_unaryop__identity_int8_int16.c
index 995ba4dc8c..3fc0a34657 100644
--- a/Source/Generated/GB_unaryop__identity_int8_int16.c
+++ b/Source/Generated/GB_unaryop__identity_int8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_int32.c b/Source/Generated/GB_unaryop__identity_int8_int32.c
index 5f03ca3589..53b1076c90 100644
--- a/Source/Generated/GB_unaryop__identity_int8_int32.c
+++ b/Source/Generated/GB_unaryop__identity_int8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_int64.c b/Source/Generated/GB_unaryop__identity_int8_int64.c
index b8f3ad9856..ba4bcf2b29 100644
--- a/Source/Generated/GB_unaryop__identity_int8_int64.c
+++ b/Source/Generated/GB_unaryop__identity_int8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_int8.c b/Source/Generated/GB_unaryop__identity_int8_int8.c
index 9499057215..b212199c52 100644
--- a/Source/Generated/GB_unaryop__identity_int8_int8.c
+++ b/Source/Generated/GB_unaryop__identity_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_uint16.c b/Source/Generated/GB_unaryop__identity_int8_uint16.c
index dcd1c77239..6892bd3b94 100644
--- a/Source/Generated/GB_unaryop__identity_int8_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_int8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_uint32.c b/Source/Generated/GB_unaryop__identity_int8_uint32.c
index 1857ccf43f..2fe588d740 100644
--- a/Source/Generated/GB_unaryop__identity_int8_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_int8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_uint64.c b/Source/Generated/GB_unaryop__identity_int8_uint64.c
index b699b0dbb7..b10ef48811 100644
--- a/Source/Generated/GB_unaryop__identity_int8_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_int8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_int8_uint8.c b/Source/Generated/GB_unaryop__identity_int8_uint8.c
index 28282b57ec..2876b88cfe 100644
--- a/Source/Generated/GB_unaryop__identity_int8_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_int8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_bool.c b/Source/Generated/GB_unaryop__identity_uint16_bool.c
index 910d1045ef..272c196f1d 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_bool.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_fp32.c b/Source/Generated/GB_unaryop__identity_uint16_fp32.c
index bfdcfaf260..bd9991154d 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_fp64.c b/Source/Generated/GB_unaryop__identity_uint16_fp64.c
index 1d7614552e..fca57d6328 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_int16.c b/Source/Generated/GB_unaryop__identity_uint16_int16.c
index 5f154915bf..1847c42e43 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_int16.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_int32.c b/Source/Generated/GB_unaryop__identity_uint16_int32.c
index 13240b2bb3..086ff567bc 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_int32.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_int64.c b/Source/Generated/GB_unaryop__identity_uint16_int64.c
index 43eef65dbb..ab5a290da9 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_int64.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_int8.c b/Source/Generated/GB_unaryop__identity_uint16_int8.c
index 4f0c976a63..4790da11a6 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_int8.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_uint16.c b/Source/Generated/GB_unaryop__identity_uint16_uint16.c
index ae65b9d5d2..7fde9be294 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_uint32.c b/Source/Generated/GB_unaryop__identity_uint16_uint32.c
index d5ad87ffc5..4ce7b3acd8 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_uint64.c b/Source/Generated/GB_unaryop__identity_uint16_uint64.c
index d370dad16a..e5596f22b4 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint16_uint8.c b/Source/Generated/GB_unaryop__identity_uint16_uint8.c
index 8b69ae3d55..dcb6c750e7 100644
--- a/Source/Generated/GB_unaryop__identity_uint16_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_uint16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_bool.c b/Source/Generated/GB_unaryop__identity_uint32_bool.c
index 18bbb608ac..883a083409 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_bool.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_fp32.c b/Source/Generated/GB_unaryop__identity_uint32_fp32.c
index 77afe32093..1c7dc2efdf 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_fp64.c b/Source/Generated/GB_unaryop__identity_uint32_fp64.c
index fb2fa696e3..72b9518d35 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_int16.c b/Source/Generated/GB_unaryop__identity_uint32_int16.c
index ddfd435a40..9a7556cd5f 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_int16.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_int32.c b/Source/Generated/GB_unaryop__identity_uint32_int32.c
index 7c6934034d..7c9168512c 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_int32.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_int64.c b/Source/Generated/GB_unaryop__identity_uint32_int64.c
index efbb3ded5b..d2e489cb64 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_int64.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_int8.c b/Source/Generated/GB_unaryop__identity_uint32_int8.c
index e5bd29ceeb..f2656d01e6 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_int8.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_uint16.c b/Source/Generated/GB_unaryop__identity_uint32_uint16.c
index c54e3a3889..53c9ae5438 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_uint32.c b/Source/Generated/GB_unaryop__identity_uint32_uint32.c
index f89b5aee6b..bf55c1b080 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_uint64.c b/Source/Generated/GB_unaryop__identity_uint32_uint64.c
index 880678e03e..26e8bbadff 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint32_uint8.c b/Source/Generated/GB_unaryop__identity_uint32_uint8.c
index 0a21f8addd..182c10ca85 100644
--- a/Source/Generated/GB_unaryop__identity_uint32_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_uint32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_bool.c b/Source/Generated/GB_unaryop__identity_uint64_bool.c
index 4c14ffd08b..9e501024a7 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_bool.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_fp32.c b/Source/Generated/GB_unaryop__identity_uint64_fp32.c
index e5ed0a901e..a6190bbf72 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_fp64.c b/Source/Generated/GB_unaryop__identity_uint64_fp64.c
index 34b4d21fc8..4c4d049fe1 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_int16.c b/Source/Generated/GB_unaryop__identity_uint64_int16.c
index 42b67ea1d6..9441267c45 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_int16.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_int32.c b/Source/Generated/GB_unaryop__identity_uint64_int32.c
index 2c3b989132..8545a2ed09 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_int32.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_int64.c b/Source/Generated/GB_unaryop__identity_uint64_int64.c
index 8a9e36aaba..7d9fa6a01c 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_int64.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_int8.c b/Source/Generated/GB_unaryop__identity_uint64_int8.c
index e1892d4885..bfcf09f8d8 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_int8.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_uint16.c b/Source/Generated/GB_unaryop__identity_uint64_uint16.c
index d1c3eb0ce6..6abadd4ee1 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_uint32.c b/Source/Generated/GB_unaryop__identity_uint64_uint32.c
index 9fe3cb7259..f31d118a33 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_uint64.c b/Source/Generated/GB_unaryop__identity_uint64_uint64.c
index 8f2f0fee8f..8e2de75cc2 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint64_uint8.c b/Source/Generated/GB_unaryop__identity_uint64_uint8.c
index 9477644bb0..1e9b8c7320 100644
--- a/Source/Generated/GB_unaryop__identity_uint64_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_uint64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_bool.c b/Source/Generated/GB_unaryop__identity_uint8_bool.c
index e7b11ed8e5..f7ff1aeaa9 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_bool.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_fp32.c b/Source/Generated/GB_unaryop__identity_uint8_fp32.c
index 9d683f0202..b3ec695252 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_fp32.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_fp64.c b/Source/Generated/GB_unaryop__identity_uint8_fp64.c
index 5998d7f298..16fe183e07 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_fp64.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_int16.c b/Source/Generated/GB_unaryop__identity_uint8_int16.c
index d3c2e6ea4a..d43aedfbce 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_int16.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_int32.c b/Source/Generated/GB_unaryop__identity_uint8_int32.c
index 085a0b8a1c..d2cd8e2b9b 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_int32.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_int64.c b/Source/Generated/GB_unaryop__identity_uint8_int64.c
index f8649fbd91..9f61f36575 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_int64.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_int8.c b/Source/Generated/GB_unaryop__identity_uint8_int8.c
index c51adf7665..23d7f16f71 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_int8.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_uint16.c b/Source/Generated/GB_unaryop__identity_uint8_uint16.c
index 98143ebb7a..c7c1b5948f 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_uint16.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_uint32.c b/Source/Generated/GB_unaryop__identity_uint8_uint32.c
index fffd5ad981..c2c6bd472a 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_uint32.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_uint64.c b/Source/Generated/GB_unaryop__identity_uint8_uint64.c
index ea0c176d2d..5da22bbee3 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_uint64.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__identity_uint8_uint8.c b/Source/Generated/GB_unaryop__identity_uint8_uint8.c
index b348a4c239..275a587267 100644
--- a/Source/Generated/GB_unaryop__identity_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__identity_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__identity_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__include.h b/Source/Generated/GB_unaryop__include.h
index bc5f1fadf1..601eddabc4 100644
--- a/Source/Generated/GB_unaryop__include.h
+++ b/Source/Generated/GB_unaryop__include.h
@@ -2,15 +2,15 @@
 // GB_unaryop__include.h: definitions for GB_unaryop__*.c
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.
 
 // This file has been automatically generated from Generator/GB_unaryop.h
 
 GrB_Info GB_unop__one_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -27,8 +27,8 @@ GrB_Info GB_tran__one_bool_bool
 
 GrB_Info GB_unop__one_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -45,8 +45,8 @@ GrB_Info GB_tran__one_int8_int8
 
 GrB_Info GB_unop__one_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -63,8 +63,8 @@ GrB_Info GB_tran__one_int16_int16
 
 GrB_Info GB_unop__one_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -81,8 +81,8 @@ GrB_Info GB_tran__one_int32_int32
 
 GrB_Info GB_unop__one_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -99,8 +99,8 @@ GrB_Info GB_tran__one_int64_int64
 
 GrB_Info GB_unop__one_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -117,8 +117,8 @@ GrB_Info GB_tran__one_uint8_uint8
 
 GrB_Info GB_unop__one_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -135,8 +135,8 @@ GrB_Info GB_tran__one_uint16_uint16
 
 GrB_Info GB_unop__one_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -153,8 +153,8 @@ GrB_Info GB_tran__one_uint32_uint32
 
 GrB_Info GB_unop__one_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -171,8 +171,8 @@ GrB_Info GB_tran__one_uint64_uint64
 
 GrB_Info GB_unop__one_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -189,8 +189,8 @@ GrB_Info GB_tran__one_fp32_fp32
 
 GrB_Info GB_unop__one_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -207,8 +207,8 @@ GrB_Info GB_tran__one_fp64_fp64
 
 GrB_Info GB_unop__identity_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -225,8 +225,8 @@ GrB_Info GB_tran__identity_bool_bool
 
 GrB_Info GB_unop__identity_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -243,8 +243,8 @@ GrB_Info GB_tran__identity_bool_int8
 
 GrB_Info GB_unop__identity_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -261,8 +261,8 @@ GrB_Info GB_tran__identity_bool_int16
 
 GrB_Info GB_unop__identity_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -279,8 +279,8 @@ GrB_Info GB_tran__identity_bool_int32
 
 GrB_Info GB_unop__identity_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -297,8 +297,8 @@ GrB_Info GB_tran__identity_bool_int64
 
 GrB_Info GB_unop__identity_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -315,8 +315,8 @@ GrB_Info GB_tran__identity_bool_uint8
 
 GrB_Info GB_unop__identity_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -333,8 +333,8 @@ GrB_Info GB_tran__identity_bool_uint16
 
 GrB_Info GB_unop__identity_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -351,8 +351,8 @@ GrB_Info GB_tran__identity_bool_uint32
 
 GrB_Info GB_unop__identity_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -369,8 +369,8 @@ GrB_Info GB_tran__identity_bool_uint64
 
 GrB_Info GB_unop__identity_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -387,8 +387,8 @@ GrB_Info GB_tran__identity_bool_fp32
 
 GrB_Info GB_unop__identity_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -405,8 +405,8 @@ GrB_Info GB_tran__identity_bool_fp64
 
 GrB_Info GB_unop__identity_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -423,8 +423,8 @@ GrB_Info GB_tran__identity_int8_bool
 
 GrB_Info GB_unop__identity_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -441,8 +441,8 @@ GrB_Info GB_tran__identity_int8_int8
 
 GrB_Info GB_unop__identity_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -459,8 +459,8 @@ GrB_Info GB_tran__identity_int8_int16
 
 GrB_Info GB_unop__identity_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -477,8 +477,8 @@ GrB_Info GB_tran__identity_int8_int32
 
 GrB_Info GB_unop__identity_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -495,8 +495,8 @@ GrB_Info GB_tran__identity_int8_int64
 
 GrB_Info GB_unop__identity_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -513,8 +513,8 @@ GrB_Info GB_tran__identity_int8_uint8
 
 GrB_Info GB_unop__identity_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -531,8 +531,8 @@ GrB_Info GB_tran__identity_int8_uint16
 
 GrB_Info GB_unop__identity_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -549,8 +549,8 @@ GrB_Info GB_tran__identity_int8_uint32
 
 GrB_Info GB_unop__identity_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -567,8 +567,8 @@ GrB_Info GB_tran__identity_int8_uint64
 
 GrB_Info GB_unop__identity_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -585,8 +585,8 @@ GrB_Info GB_tran__identity_int8_fp32
 
 GrB_Info GB_unop__identity_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -603,8 +603,8 @@ GrB_Info GB_tran__identity_int8_fp64
 
 GrB_Info GB_unop__identity_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -621,8 +621,8 @@ GrB_Info GB_tran__identity_int16_bool
 
 GrB_Info GB_unop__identity_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -639,8 +639,8 @@ GrB_Info GB_tran__identity_int16_int8
 
 GrB_Info GB_unop__identity_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -657,8 +657,8 @@ GrB_Info GB_tran__identity_int16_int16
 
 GrB_Info GB_unop__identity_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -675,8 +675,8 @@ GrB_Info GB_tran__identity_int16_int32
 
 GrB_Info GB_unop__identity_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -693,8 +693,8 @@ GrB_Info GB_tran__identity_int16_int64
 
 GrB_Info GB_unop__identity_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -711,8 +711,8 @@ GrB_Info GB_tran__identity_int16_uint8
 
 GrB_Info GB_unop__identity_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -729,8 +729,8 @@ GrB_Info GB_tran__identity_int16_uint16
 
 GrB_Info GB_unop__identity_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -747,8 +747,8 @@ GrB_Info GB_tran__identity_int16_uint32
 
 GrB_Info GB_unop__identity_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -765,8 +765,8 @@ GrB_Info GB_tran__identity_int16_uint64
 
 GrB_Info GB_unop__identity_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -783,8 +783,8 @@ GrB_Info GB_tran__identity_int16_fp32
 
 GrB_Info GB_unop__identity_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -801,8 +801,8 @@ GrB_Info GB_tran__identity_int16_fp64
 
 GrB_Info GB_unop__identity_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -819,8 +819,8 @@ GrB_Info GB_tran__identity_int32_bool
 
 GrB_Info GB_unop__identity_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -837,8 +837,8 @@ GrB_Info GB_tran__identity_int32_int8
 
 GrB_Info GB_unop__identity_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -855,8 +855,8 @@ GrB_Info GB_tran__identity_int32_int16
 
 GrB_Info GB_unop__identity_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -873,8 +873,8 @@ GrB_Info GB_tran__identity_int32_int32
 
 GrB_Info GB_unop__identity_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -891,8 +891,8 @@ GrB_Info GB_tran__identity_int32_int64
 
 GrB_Info GB_unop__identity_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -909,8 +909,8 @@ GrB_Info GB_tran__identity_int32_uint8
 
 GrB_Info GB_unop__identity_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -927,8 +927,8 @@ GrB_Info GB_tran__identity_int32_uint16
 
 GrB_Info GB_unop__identity_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -945,8 +945,8 @@ GrB_Info GB_tran__identity_int32_uint32
 
 GrB_Info GB_unop__identity_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -963,8 +963,8 @@ GrB_Info GB_tran__identity_int32_uint64
 
 GrB_Info GB_unop__identity_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -981,8 +981,8 @@ GrB_Info GB_tran__identity_int32_fp32
 
 GrB_Info GB_unop__identity_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -999,8 +999,8 @@ GrB_Info GB_tran__identity_int32_fp64
 
 GrB_Info GB_unop__identity_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1017,8 +1017,8 @@ GrB_Info GB_tran__identity_int64_bool
 
 GrB_Info GB_unop__identity_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1035,8 +1035,8 @@ GrB_Info GB_tran__identity_int64_int8
 
 GrB_Info GB_unop__identity_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1053,8 +1053,8 @@ GrB_Info GB_tran__identity_int64_int16
 
 GrB_Info GB_unop__identity_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1071,8 +1071,8 @@ GrB_Info GB_tran__identity_int64_int32
 
 GrB_Info GB_unop__identity_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1089,8 +1089,8 @@ GrB_Info GB_tran__identity_int64_int64
 
 GrB_Info GB_unop__identity_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1107,8 +1107,8 @@ GrB_Info GB_tran__identity_int64_uint8
 
 GrB_Info GB_unop__identity_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1125,8 +1125,8 @@ GrB_Info GB_tran__identity_int64_uint16
 
 GrB_Info GB_unop__identity_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1143,8 +1143,8 @@ GrB_Info GB_tran__identity_int64_uint32
 
 GrB_Info GB_unop__identity_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1161,8 +1161,8 @@ GrB_Info GB_tran__identity_int64_uint64
 
 GrB_Info GB_unop__identity_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1179,8 +1179,8 @@ GrB_Info GB_tran__identity_int64_fp32
 
 GrB_Info GB_unop__identity_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1197,8 +1197,8 @@ GrB_Info GB_tran__identity_int64_fp64
 
 GrB_Info GB_unop__identity_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1215,8 +1215,8 @@ GrB_Info GB_tran__identity_uint8_bool
 
 GrB_Info GB_unop__identity_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1233,8 +1233,8 @@ GrB_Info GB_tran__identity_uint8_int8
 
 GrB_Info GB_unop__identity_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1251,8 +1251,8 @@ GrB_Info GB_tran__identity_uint8_int16
 
 GrB_Info GB_unop__identity_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1269,8 +1269,8 @@ GrB_Info GB_tran__identity_uint8_int32
 
 GrB_Info GB_unop__identity_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1287,8 +1287,8 @@ GrB_Info GB_tran__identity_uint8_int64
 
 GrB_Info GB_unop__identity_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1305,8 +1305,8 @@ GrB_Info GB_tran__identity_uint8_uint8
 
 GrB_Info GB_unop__identity_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1323,8 +1323,8 @@ GrB_Info GB_tran__identity_uint8_uint16
 
 GrB_Info GB_unop__identity_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1341,8 +1341,8 @@ GrB_Info GB_tran__identity_uint8_uint32
 
 GrB_Info GB_unop__identity_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1359,8 +1359,8 @@ GrB_Info GB_tran__identity_uint8_uint64
 
 GrB_Info GB_unop__identity_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1377,8 +1377,8 @@ GrB_Info GB_tran__identity_uint8_fp32
 
 GrB_Info GB_unop__identity_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1395,8 +1395,8 @@ GrB_Info GB_tran__identity_uint8_fp64
 
 GrB_Info GB_unop__identity_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1413,8 +1413,8 @@ GrB_Info GB_tran__identity_uint16_bool
 
 GrB_Info GB_unop__identity_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1431,8 +1431,8 @@ GrB_Info GB_tran__identity_uint16_int8
 
 GrB_Info GB_unop__identity_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1449,8 +1449,8 @@ GrB_Info GB_tran__identity_uint16_int16
 
 GrB_Info GB_unop__identity_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1467,8 +1467,8 @@ GrB_Info GB_tran__identity_uint16_int32
 
 GrB_Info GB_unop__identity_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1485,8 +1485,8 @@ GrB_Info GB_tran__identity_uint16_int64
 
 GrB_Info GB_unop__identity_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1503,8 +1503,8 @@ GrB_Info GB_tran__identity_uint16_uint8
 
 GrB_Info GB_unop__identity_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1521,8 +1521,8 @@ GrB_Info GB_tran__identity_uint16_uint16
 
 GrB_Info GB_unop__identity_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1539,8 +1539,8 @@ GrB_Info GB_tran__identity_uint16_uint32
 
 GrB_Info GB_unop__identity_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1557,8 +1557,8 @@ GrB_Info GB_tran__identity_uint16_uint64
 
 GrB_Info GB_unop__identity_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1575,8 +1575,8 @@ GrB_Info GB_tran__identity_uint16_fp32
 
 GrB_Info GB_unop__identity_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1593,8 +1593,8 @@ GrB_Info GB_tran__identity_uint16_fp64
 
 GrB_Info GB_unop__identity_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1611,8 +1611,8 @@ GrB_Info GB_tran__identity_uint32_bool
 
 GrB_Info GB_unop__identity_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1629,8 +1629,8 @@ GrB_Info GB_tran__identity_uint32_int8
 
 GrB_Info GB_unop__identity_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1647,8 +1647,8 @@ GrB_Info GB_tran__identity_uint32_int16
 
 GrB_Info GB_unop__identity_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1665,8 +1665,8 @@ GrB_Info GB_tran__identity_uint32_int32
 
 GrB_Info GB_unop__identity_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1683,8 +1683,8 @@ GrB_Info GB_tran__identity_uint32_int64
 
 GrB_Info GB_unop__identity_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1701,8 +1701,8 @@ GrB_Info GB_tran__identity_uint32_uint8
 
 GrB_Info GB_unop__identity_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1719,8 +1719,8 @@ GrB_Info GB_tran__identity_uint32_uint16
 
 GrB_Info GB_unop__identity_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1737,8 +1737,8 @@ GrB_Info GB_tran__identity_uint32_uint32
 
 GrB_Info GB_unop__identity_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1755,8 +1755,8 @@ GrB_Info GB_tran__identity_uint32_uint64
 
 GrB_Info GB_unop__identity_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1773,8 +1773,8 @@ GrB_Info GB_tran__identity_uint32_fp32
 
 GrB_Info GB_unop__identity_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1791,8 +1791,8 @@ GrB_Info GB_tran__identity_uint32_fp64
 
 GrB_Info GB_unop__identity_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1809,8 +1809,8 @@ GrB_Info GB_tran__identity_uint64_bool
 
 GrB_Info GB_unop__identity_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1827,8 +1827,8 @@ GrB_Info GB_tran__identity_uint64_int8
 
 GrB_Info GB_unop__identity_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1845,8 +1845,8 @@ GrB_Info GB_tran__identity_uint64_int16
 
 GrB_Info GB_unop__identity_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1863,8 +1863,8 @@ GrB_Info GB_tran__identity_uint64_int32
 
 GrB_Info GB_unop__identity_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1881,8 +1881,8 @@ GrB_Info GB_tran__identity_uint64_int64
 
 GrB_Info GB_unop__identity_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1899,8 +1899,8 @@ GrB_Info GB_tran__identity_uint64_uint8
 
 GrB_Info GB_unop__identity_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1917,8 +1917,8 @@ GrB_Info GB_tran__identity_uint64_uint16
 
 GrB_Info GB_unop__identity_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1935,8 +1935,8 @@ GrB_Info GB_tran__identity_uint64_uint32
 
 GrB_Info GB_unop__identity_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1953,8 +1953,8 @@ GrB_Info GB_tran__identity_uint64_uint64
 
 GrB_Info GB_unop__identity_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1971,8 +1971,8 @@ GrB_Info GB_tran__identity_uint64_fp32
 
 GrB_Info GB_unop__identity_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -1989,8 +1989,8 @@ GrB_Info GB_tran__identity_uint64_fp64
 
 GrB_Info GB_unop__identity_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2007,8 +2007,8 @@ GrB_Info GB_tran__identity_fp32_bool
 
 GrB_Info GB_unop__identity_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2025,8 +2025,8 @@ GrB_Info GB_tran__identity_fp32_int8
 
 GrB_Info GB_unop__identity_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2043,8 +2043,8 @@ GrB_Info GB_tran__identity_fp32_int16
 
 GrB_Info GB_unop__identity_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2061,8 +2061,8 @@ GrB_Info GB_tran__identity_fp32_int32
 
 GrB_Info GB_unop__identity_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2079,8 +2079,8 @@ GrB_Info GB_tran__identity_fp32_int64
 
 GrB_Info GB_unop__identity_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2097,8 +2097,8 @@ GrB_Info GB_tran__identity_fp32_uint8
 
 GrB_Info GB_unop__identity_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2115,8 +2115,8 @@ GrB_Info GB_tran__identity_fp32_uint16
 
 GrB_Info GB_unop__identity_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2133,8 +2133,8 @@ GrB_Info GB_tran__identity_fp32_uint32
 
 GrB_Info GB_unop__identity_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2151,8 +2151,8 @@ GrB_Info GB_tran__identity_fp32_uint64
 
 GrB_Info GB_unop__identity_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2169,8 +2169,8 @@ GrB_Info GB_tran__identity_fp32_fp32
 
 GrB_Info GB_unop__identity_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2187,8 +2187,8 @@ GrB_Info GB_tran__identity_fp32_fp64
 
 GrB_Info GB_unop__identity_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2205,8 +2205,8 @@ GrB_Info GB_tran__identity_fp64_bool
 
 GrB_Info GB_unop__identity_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2223,8 +2223,8 @@ GrB_Info GB_tran__identity_fp64_int8
 
 GrB_Info GB_unop__identity_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2241,8 +2241,8 @@ GrB_Info GB_tran__identity_fp64_int16
 
 GrB_Info GB_unop__identity_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2259,8 +2259,8 @@ GrB_Info GB_tran__identity_fp64_int32
 
 GrB_Info GB_unop__identity_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2277,8 +2277,8 @@ GrB_Info GB_tran__identity_fp64_int64
 
 GrB_Info GB_unop__identity_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2295,8 +2295,8 @@ GrB_Info GB_tran__identity_fp64_uint8
 
 GrB_Info GB_unop__identity_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2313,8 +2313,8 @@ GrB_Info GB_tran__identity_fp64_uint16
 
 GrB_Info GB_unop__identity_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2331,8 +2331,8 @@ GrB_Info GB_tran__identity_fp64_uint32
 
 GrB_Info GB_unop__identity_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2349,8 +2349,8 @@ GrB_Info GB_tran__identity_fp64_uint64
 
 GrB_Info GB_unop__identity_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2367,8 +2367,8 @@ GrB_Info GB_tran__identity_fp64_fp32
 
 GrB_Info GB_unop__identity_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2385,8 +2385,8 @@ GrB_Info GB_tran__identity_fp64_fp64
 
 GrB_Info GB_unop__ainv_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2403,8 +2403,8 @@ GrB_Info GB_tran__ainv_bool_bool
 
 GrB_Info GB_unop__ainv_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2421,8 +2421,8 @@ GrB_Info GB_tran__ainv_bool_int8
 
 GrB_Info GB_unop__ainv_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2439,8 +2439,8 @@ GrB_Info GB_tran__ainv_bool_int16
 
 GrB_Info GB_unop__ainv_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2457,8 +2457,8 @@ GrB_Info GB_tran__ainv_bool_int32
 
 GrB_Info GB_unop__ainv_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2475,8 +2475,8 @@ GrB_Info GB_tran__ainv_bool_int64
 
 GrB_Info GB_unop__ainv_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2493,8 +2493,8 @@ GrB_Info GB_tran__ainv_bool_uint8
 
 GrB_Info GB_unop__ainv_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2511,8 +2511,8 @@ GrB_Info GB_tran__ainv_bool_uint16
 
 GrB_Info GB_unop__ainv_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2529,8 +2529,8 @@ GrB_Info GB_tran__ainv_bool_uint32
 
 GrB_Info GB_unop__ainv_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2547,8 +2547,8 @@ GrB_Info GB_tran__ainv_bool_uint64
 
 GrB_Info GB_unop__ainv_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2565,8 +2565,8 @@ GrB_Info GB_tran__ainv_bool_fp32
 
 GrB_Info GB_unop__ainv_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2583,8 +2583,8 @@ GrB_Info GB_tran__ainv_bool_fp64
 
 GrB_Info GB_unop__ainv_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2601,8 +2601,8 @@ GrB_Info GB_tran__ainv_int8_bool
 
 GrB_Info GB_unop__ainv_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2619,8 +2619,8 @@ GrB_Info GB_tran__ainv_int8_int8
 
 GrB_Info GB_unop__ainv_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2637,8 +2637,8 @@ GrB_Info GB_tran__ainv_int8_int16
 
 GrB_Info GB_unop__ainv_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2655,8 +2655,8 @@ GrB_Info GB_tran__ainv_int8_int32
 
 GrB_Info GB_unop__ainv_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2673,8 +2673,8 @@ GrB_Info GB_tran__ainv_int8_int64
 
 GrB_Info GB_unop__ainv_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2691,8 +2691,8 @@ GrB_Info GB_tran__ainv_int8_uint8
 
 GrB_Info GB_unop__ainv_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2709,8 +2709,8 @@ GrB_Info GB_tran__ainv_int8_uint16
 
 GrB_Info GB_unop__ainv_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2727,8 +2727,8 @@ GrB_Info GB_tran__ainv_int8_uint32
 
 GrB_Info GB_unop__ainv_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2745,8 +2745,8 @@ GrB_Info GB_tran__ainv_int8_uint64
 
 GrB_Info GB_unop__ainv_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2763,8 +2763,8 @@ GrB_Info GB_tran__ainv_int8_fp32
 
 GrB_Info GB_unop__ainv_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2781,8 +2781,8 @@ GrB_Info GB_tran__ainv_int8_fp64
 
 GrB_Info GB_unop__ainv_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2799,8 +2799,8 @@ GrB_Info GB_tran__ainv_int16_bool
 
 GrB_Info GB_unop__ainv_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2817,8 +2817,8 @@ GrB_Info GB_tran__ainv_int16_int8
 
 GrB_Info GB_unop__ainv_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2835,8 +2835,8 @@ GrB_Info GB_tran__ainv_int16_int16
 
 GrB_Info GB_unop__ainv_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2853,8 +2853,8 @@ GrB_Info GB_tran__ainv_int16_int32
 
 GrB_Info GB_unop__ainv_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2871,8 +2871,8 @@ GrB_Info GB_tran__ainv_int16_int64
 
 GrB_Info GB_unop__ainv_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2889,8 +2889,8 @@ GrB_Info GB_tran__ainv_int16_uint8
 
 GrB_Info GB_unop__ainv_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2907,8 +2907,8 @@ GrB_Info GB_tran__ainv_int16_uint16
 
 GrB_Info GB_unop__ainv_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2925,8 +2925,8 @@ GrB_Info GB_tran__ainv_int16_uint32
 
 GrB_Info GB_unop__ainv_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2943,8 +2943,8 @@ GrB_Info GB_tran__ainv_int16_uint64
 
 GrB_Info GB_unop__ainv_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2961,8 +2961,8 @@ GrB_Info GB_tran__ainv_int16_fp32
 
 GrB_Info GB_unop__ainv_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2979,8 +2979,8 @@ GrB_Info GB_tran__ainv_int16_fp64
 
 GrB_Info GB_unop__ainv_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -2997,8 +2997,8 @@ GrB_Info GB_tran__ainv_int32_bool
 
 GrB_Info GB_unop__ainv_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3015,8 +3015,8 @@ GrB_Info GB_tran__ainv_int32_int8
 
 GrB_Info GB_unop__ainv_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3033,8 +3033,8 @@ GrB_Info GB_tran__ainv_int32_int16
 
 GrB_Info GB_unop__ainv_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3051,8 +3051,8 @@ GrB_Info GB_tran__ainv_int32_int32
 
 GrB_Info GB_unop__ainv_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3069,8 +3069,8 @@ GrB_Info GB_tran__ainv_int32_int64
 
 GrB_Info GB_unop__ainv_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3087,8 +3087,8 @@ GrB_Info GB_tran__ainv_int32_uint8
 
 GrB_Info GB_unop__ainv_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3105,8 +3105,8 @@ GrB_Info GB_tran__ainv_int32_uint16
 
 GrB_Info GB_unop__ainv_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3123,8 +3123,8 @@ GrB_Info GB_tran__ainv_int32_uint32
 
 GrB_Info GB_unop__ainv_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3141,8 +3141,8 @@ GrB_Info GB_tran__ainv_int32_uint64
 
 GrB_Info GB_unop__ainv_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3159,8 +3159,8 @@ GrB_Info GB_tran__ainv_int32_fp32
 
 GrB_Info GB_unop__ainv_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3177,8 +3177,8 @@ GrB_Info GB_tran__ainv_int32_fp64
 
 GrB_Info GB_unop__ainv_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3195,8 +3195,8 @@ GrB_Info GB_tran__ainv_int64_bool
 
 GrB_Info GB_unop__ainv_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3213,8 +3213,8 @@ GrB_Info GB_tran__ainv_int64_int8
 
 GrB_Info GB_unop__ainv_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3231,8 +3231,8 @@ GrB_Info GB_tran__ainv_int64_int16
 
 GrB_Info GB_unop__ainv_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3249,8 +3249,8 @@ GrB_Info GB_tran__ainv_int64_int32
 
 GrB_Info GB_unop__ainv_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3267,8 +3267,8 @@ GrB_Info GB_tran__ainv_int64_int64
 
 GrB_Info GB_unop__ainv_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3285,8 +3285,8 @@ GrB_Info GB_tran__ainv_int64_uint8
 
 GrB_Info GB_unop__ainv_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3303,8 +3303,8 @@ GrB_Info GB_tran__ainv_int64_uint16
 
 GrB_Info GB_unop__ainv_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3321,8 +3321,8 @@ GrB_Info GB_tran__ainv_int64_uint32
 
 GrB_Info GB_unop__ainv_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3339,8 +3339,8 @@ GrB_Info GB_tran__ainv_int64_uint64
 
 GrB_Info GB_unop__ainv_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3357,8 +3357,8 @@ GrB_Info GB_tran__ainv_int64_fp32
 
 GrB_Info GB_unop__ainv_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3375,8 +3375,8 @@ GrB_Info GB_tran__ainv_int64_fp64
 
 GrB_Info GB_unop__ainv_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3393,8 +3393,8 @@ GrB_Info GB_tran__ainv_uint8_bool
 
 GrB_Info GB_unop__ainv_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3411,8 +3411,8 @@ GrB_Info GB_tran__ainv_uint8_int8
 
 GrB_Info GB_unop__ainv_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3429,8 +3429,8 @@ GrB_Info GB_tran__ainv_uint8_int16
 
 GrB_Info GB_unop__ainv_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3447,8 +3447,8 @@ GrB_Info GB_tran__ainv_uint8_int32
 
 GrB_Info GB_unop__ainv_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3465,8 +3465,8 @@ GrB_Info GB_tran__ainv_uint8_int64
 
 GrB_Info GB_unop__ainv_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3483,8 +3483,8 @@ GrB_Info GB_tran__ainv_uint8_uint8
 
 GrB_Info GB_unop__ainv_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3501,8 +3501,8 @@ GrB_Info GB_tran__ainv_uint8_uint16
 
 GrB_Info GB_unop__ainv_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3519,8 +3519,8 @@ GrB_Info GB_tran__ainv_uint8_uint32
 
 GrB_Info GB_unop__ainv_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3537,8 +3537,8 @@ GrB_Info GB_tran__ainv_uint8_uint64
 
 GrB_Info GB_unop__ainv_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3555,8 +3555,8 @@ GrB_Info GB_tran__ainv_uint8_fp32
 
 GrB_Info GB_unop__ainv_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3573,8 +3573,8 @@ GrB_Info GB_tran__ainv_uint8_fp64
 
 GrB_Info GB_unop__ainv_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3591,8 +3591,8 @@ GrB_Info GB_tran__ainv_uint16_bool
 
 GrB_Info GB_unop__ainv_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3609,8 +3609,8 @@ GrB_Info GB_tran__ainv_uint16_int8
 
 GrB_Info GB_unop__ainv_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3627,8 +3627,8 @@ GrB_Info GB_tran__ainv_uint16_int16
 
 GrB_Info GB_unop__ainv_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3645,8 +3645,8 @@ GrB_Info GB_tran__ainv_uint16_int32
 
 GrB_Info GB_unop__ainv_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3663,8 +3663,8 @@ GrB_Info GB_tran__ainv_uint16_int64
 
 GrB_Info GB_unop__ainv_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3681,8 +3681,8 @@ GrB_Info GB_tran__ainv_uint16_uint8
 
 GrB_Info GB_unop__ainv_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3699,8 +3699,8 @@ GrB_Info GB_tran__ainv_uint16_uint16
 
 GrB_Info GB_unop__ainv_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3717,8 +3717,8 @@ GrB_Info GB_tran__ainv_uint16_uint32
 
 GrB_Info GB_unop__ainv_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3735,8 +3735,8 @@ GrB_Info GB_tran__ainv_uint16_uint64
 
 GrB_Info GB_unop__ainv_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3753,8 +3753,8 @@ GrB_Info GB_tran__ainv_uint16_fp32
 
 GrB_Info GB_unop__ainv_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3771,8 +3771,8 @@ GrB_Info GB_tran__ainv_uint16_fp64
 
 GrB_Info GB_unop__ainv_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3789,8 +3789,8 @@ GrB_Info GB_tran__ainv_uint32_bool
 
 GrB_Info GB_unop__ainv_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3807,8 +3807,8 @@ GrB_Info GB_tran__ainv_uint32_int8
 
 GrB_Info GB_unop__ainv_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3825,8 +3825,8 @@ GrB_Info GB_tran__ainv_uint32_int16
 
 GrB_Info GB_unop__ainv_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3843,8 +3843,8 @@ GrB_Info GB_tran__ainv_uint32_int32
 
 GrB_Info GB_unop__ainv_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3861,8 +3861,8 @@ GrB_Info GB_tran__ainv_uint32_int64
 
 GrB_Info GB_unop__ainv_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3879,8 +3879,8 @@ GrB_Info GB_tran__ainv_uint32_uint8
 
 GrB_Info GB_unop__ainv_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3897,8 +3897,8 @@ GrB_Info GB_tran__ainv_uint32_uint16
 
 GrB_Info GB_unop__ainv_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3915,8 +3915,8 @@ GrB_Info GB_tran__ainv_uint32_uint32
 
 GrB_Info GB_unop__ainv_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3933,8 +3933,8 @@ GrB_Info GB_tran__ainv_uint32_uint64
 
 GrB_Info GB_unop__ainv_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3951,8 +3951,8 @@ GrB_Info GB_tran__ainv_uint32_fp32
 
 GrB_Info GB_unop__ainv_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3969,8 +3969,8 @@ GrB_Info GB_tran__ainv_uint32_fp64
 
 GrB_Info GB_unop__ainv_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -3987,8 +3987,8 @@ GrB_Info GB_tran__ainv_uint64_bool
 
 GrB_Info GB_unop__ainv_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4005,8 +4005,8 @@ GrB_Info GB_tran__ainv_uint64_int8
 
 GrB_Info GB_unop__ainv_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4023,8 +4023,8 @@ GrB_Info GB_tran__ainv_uint64_int16
 
 GrB_Info GB_unop__ainv_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4041,8 +4041,8 @@ GrB_Info GB_tran__ainv_uint64_int32
 
 GrB_Info GB_unop__ainv_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4059,8 +4059,8 @@ GrB_Info GB_tran__ainv_uint64_int64
 
 GrB_Info GB_unop__ainv_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4077,8 +4077,8 @@ GrB_Info GB_tran__ainv_uint64_uint8
 
 GrB_Info GB_unop__ainv_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4095,8 +4095,8 @@ GrB_Info GB_tran__ainv_uint64_uint16
 
 GrB_Info GB_unop__ainv_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4113,8 +4113,8 @@ GrB_Info GB_tran__ainv_uint64_uint32
 
 GrB_Info GB_unop__ainv_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4131,8 +4131,8 @@ GrB_Info GB_tran__ainv_uint64_uint64
 
 GrB_Info GB_unop__ainv_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4149,8 +4149,8 @@ GrB_Info GB_tran__ainv_uint64_fp32
 
 GrB_Info GB_unop__ainv_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4167,8 +4167,8 @@ GrB_Info GB_tran__ainv_uint64_fp64
 
 GrB_Info GB_unop__ainv_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4185,8 +4185,8 @@ GrB_Info GB_tran__ainv_fp32_bool
 
 GrB_Info GB_unop__ainv_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4203,8 +4203,8 @@ GrB_Info GB_tran__ainv_fp32_int8
 
 GrB_Info GB_unop__ainv_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4221,8 +4221,8 @@ GrB_Info GB_tran__ainv_fp32_int16
 
 GrB_Info GB_unop__ainv_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4239,8 +4239,8 @@ GrB_Info GB_tran__ainv_fp32_int32
 
 GrB_Info GB_unop__ainv_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4257,8 +4257,8 @@ GrB_Info GB_tran__ainv_fp32_int64
 
 GrB_Info GB_unop__ainv_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4275,8 +4275,8 @@ GrB_Info GB_tran__ainv_fp32_uint8
 
 GrB_Info GB_unop__ainv_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4293,8 +4293,8 @@ GrB_Info GB_tran__ainv_fp32_uint16
 
 GrB_Info GB_unop__ainv_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4311,8 +4311,8 @@ GrB_Info GB_tran__ainv_fp32_uint32
 
 GrB_Info GB_unop__ainv_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4329,8 +4329,8 @@ GrB_Info GB_tran__ainv_fp32_uint64
 
 GrB_Info GB_unop__ainv_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4347,8 +4347,8 @@ GrB_Info GB_tran__ainv_fp32_fp32
 
 GrB_Info GB_unop__ainv_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4365,8 +4365,8 @@ GrB_Info GB_tran__ainv_fp32_fp64
 
 GrB_Info GB_unop__ainv_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4383,8 +4383,8 @@ GrB_Info GB_tran__ainv_fp64_bool
 
 GrB_Info GB_unop__ainv_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4401,8 +4401,8 @@ GrB_Info GB_tran__ainv_fp64_int8
 
 GrB_Info GB_unop__ainv_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4419,8 +4419,8 @@ GrB_Info GB_tran__ainv_fp64_int16
 
 GrB_Info GB_unop__ainv_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4437,8 +4437,8 @@ GrB_Info GB_tran__ainv_fp64_int32
 
 GrB_Info GB_unop__ainv_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4455,8 +4455,8 @@ GrB_Info GB_tran__ainv_fp64_int64
 
 GrB_Info GB_unop__ainv_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4473,8 +4473,8 @@ GrB_Info GB_tran__ainv_fp64_uint8
 
 GrB_Info GB_unop__ainv_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4491,8 +4491,8 @@ GrB_Info GB_tran__ainv_fp64_uint16
 
 GrB_Info GB_unop__ainv_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4509,8 +4509,8 @@ GrB_Info GB_tran__ainv_fp64_uint32
 
 GrB_Info GB_unop__ainv_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4527,8 +4527,8 @@ GrB_Info GB_tran__ainv_fp64_uint64
 
 GrB_Info GB_unop__ainv_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4545,8 +4545,8 @@ GrB_Info GB_tran__ainv_fp64_fp32
 
 GrB_Info GB_unop__ainv_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4563,8 +4563,8 @@ GrB_Info GB_tran__ainv_fp64_fp64
 
 GrB_Info GB_unop__abs_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4581,8 +4581,8 @@ GrB_Info GB_tran__abs_bool_bool
 
 GrB_Info GB_unop__abs_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4599,8 +4599,8 @@ GrB_Info GB_tran__abs_bool_int8
 
 GrB_Info GB_unop__abs_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4617,8 +4617,8 @@ GrB_Info GB_tran__abs_bool_int16
 
 GrB_Info GB_unop__abs_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4635,8 +4635,8 @@ GrB_Info GB_tran__abs_bool_int32
 
 GrB_Info GB_unop__abs_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4653,8 +4653,8 @@ GrB_Info GB_tran__abs_bool_int64
 
 GrB_Info GB_unop__abs_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4671,8 +4671,8 @@ GrB_Info GB_tran__abs_bool_uint8
 
 GrB_Info GB_unop__abs_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4689,8 +4689,8 @@ GrB_Info GB_tran__abs_bool_uint16
 
 GrB_Info GB_unop__abs_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4707,8 +4707,8 @@ GrB_Info GB_tran__abs_bool_uint32
 
 GrB_Info GB_unop__abs_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4725,8 +4725,8 @@ GrB_Info GB_tran__abs_bool_uint64
 
 GrB_Info GB_unop__abs_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4743,8 +4743,8 @@ GrB_Info GB_tran__abs_bool_fp32
 
 GrB_Info GB_unop__abs_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4761,8 +4761,8 @@ GrB_Info GB_tran__abs_bool_fp64
 
 GrB_Info GB_unop__abs_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4779,8 +4779,8 @@ GrB_Info GB_tran__abs_int8_bool
 
 GrB_Info GB_unop__abs_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4797,8 +4797,8 @@ GrB_Info GB_tran__abs_int8_int8
 
 GrB_Info GB_unop__abs_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4815,8 +4815,8 @@ GrB_Info GB_tran__abs_int8_int16
 
 GrB_Info GB_unop__abs_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4833,8 +4833,8 @@ GrB_Info GB_tran__abs_int8_int32
 
 GrB_Info GB_unop__abs_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4851,8 +4851,8 @@ GrB_Info GB_tran__abs_int8_int64
 
 GrB_Info GB_unop__abs_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4869,8 +4869,8 @@ GrB_Info GB_tran__abs_int8_uint8
 
 GrB_Info GB_unop__abs_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4887,8 +4887,8 @@ GrB_Info GB_tran__abs_int8_uint16
 
 GrB_Info GB_unop__abs_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4905,8 +4905,8 @@ GrB_Info GB_tran__abs_int8_uint32
 
 GrB_Info GB_unop__abs_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4923,8 +4923,8 @@ GrB_Info GB_tran__abs_int8_uint64
 
 GrB_Info GB_unop__abs_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4941,8 +4941,8 @@ GrB_Info GB_tran__abs_int8_fp32
 
 GrB_Info GB_unop__abs_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4959,8 +4959,8 @@ GrB_Info GB_tran__abs_int8_fp64
 
 GrB_Info GB_unop__abs_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4977,8 +4977,8 @@ GrB_Info GB_tran__abs_int16_bool
 
 GrB_Info GB_unop__abs_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -4995,8 +4995,8 @@ GrB_Info GB_tran__abs_int16_int8
 
 GrB_Info GB_unop__abs_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5013,8 +5013,8 @@ GrB_Info GB_tran__abs_int16_int16
 
 GrB_Info GB_unop__abs_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5031,8 +5031,8 @@ GrB_Info GB_tran__abs_int16_int32
 
 GrB_Info GB_unop__abs_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5049,8 +5049,8 @@ GrB_Info GB_tran__abs_int16_int64
 
 GrB_Info GB_unop__abs_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5067,8 +5067,8 @@ GrB_Info GB_tran__abs_int16_uint8
 
 GrB_Info GB_unop__abs_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5085,8 +5085,8 @@ GrB_Info GB_tran__abs_int16_uint16
 
 GrB_Info GB_unop__abs_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5103,8 +5103,8 @@ GrB_Info GB_tran__abs_int16_uint32
 
 GrB_Info GB_unop__abs_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5121,8 +5121,8 @@ GrB_Info GB_tran__abs_int16_uint64
 
 GrB_Info GB_unop__abs_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5139,8 +5139,8 @@ GrB_Info GB_tran__abs_int16_fp32
 
 GrB_Info GB_unop__abs_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5157,8 +5157,8 @@ GrB_Info GB_tran__abs_int16_fp64
 
 GrB_Info GB_unop__abs_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5175,8 +5175,8 @@ GrB_Info GB_tran__abs_int32_bool
 
 GrB_Info GB_unop__abs_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5193,8 +5193,8 @@ GrB_Info GB_tran__abs_int32_int8
 
 GrB_Info GB_unop__abs_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5211,8 +5211,8 @@ GrB_Info GB_tran__abs_int32_int16
 
 GrB_Info GB_unop__abs_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5229,8 +5229,8 @@ GrB_Info GB_tran__abs_int32_int32
 
 GrB_Info GB_unop__abs_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5247,8 +5247,8 @@ GrB_Info GB_tran__abs_int32_int64
 
 GrB_Info GB_unop__abs_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5265,8 +5265,8 @@ GrB_Info GB_tran__abs_int32_uint8
 
 GrB_Info GB_unop__abs_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5283,8 +5283,8 @@ GrB_Info GB_tran__abs_int32_uint16
 
 GrB_Info GB_unop__abs_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5301,8 +5301,8 @@ GrB_Info GB_tran__abs_int32_uint32
 
 GrB_Info GB_unop__abs_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5319,8 +5319,8 @@ GrB_Info GB_tran__abs_int32_uint64
 
 GrB_Info GB_unop__abs_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5337,8 +5337,8 @@ GrB_Info GB_tran__abs_int32_fp32
 
 GrB_Info GB_unop__abs_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5355,8 +5355,8 @@ GrB_Info GB_tran__abs_int32_fp64
 
 GrB_Info GB_unop__abs_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5373,8 +5373,8 @@ GrB_Info GB_tran__abs_int64_bool
 
 GrB_Info GB_unop__abs_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5391,8 +5391,8 @@ GrB_Info GB_tran__abs_int64_int8
 
 GrB_Info GB_unop__abs_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5409,8 +5409,8 @@ GrB_Info GB_tran__abs_int64_int16
 
 GrB_Info GB_unop__abs_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5427,8 +5427,8 @@ GrB_Info GB_tran__abs_int64_int32
 
 GrB_Info GB_unop__abs_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5445,8 +5445,8 @@ GrB_Info GB_tran__abs_int64_int64
 
 GrB_Info GB_unop__abs_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5463,8 +5463,8 @@ GrB_Info GB_tran__abs_int64_uint8
 
 GrB_Info GB_unop__abs_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5481,8 +5481,8 @@ GrB_Info GB_tran__abs_int64_uint16
 
 GrB_Info GB_unop__abs_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5499,8 +5499,8 @@ GrB_Info GB_tran__abs_int64_uint32
 
 GrB_Info GB_unop__abs_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5517,8 +5517,8 @@ GrB_Info GB_tran__abs_int64_uint64
 
 GrB_Info GB_unop__abs_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5535,8 +5535,8 @@ GrB_Info GB_tran__abs_int64_fp32
 
 GrB_Info GB_unop__abs_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5553,8 +5553,8 @@ GrB_Info GB_tran__abs_int64_fp64
 
 GrB_Info GB_unop__abs_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5571,8 +5571,8 @@ GrB_Info GB_tran__abs_uint8_bool
 
 GrB_Info GB_unop__abs_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5589,8 +5589,8 @@ GrB_Info GB_tran__abs_uint8_int8
 
 GrB_Info GB_unop__abs_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5607,8 +5607,8 @@ GrB_Info GB_tran__abs_uint8_int16
 
 GrB_Info GB_unop__abs_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5625,8 +5625,8 @@ GrB_Info GB_tran__abs_uint8_int32
 
 GrB_Info GB_unop__abs_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5643,8 +5643,8 @@ GrB_Info GB_tran__abs_uint8_int64
 
 GrB_Info GB_unop__abs_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5661,8 +5661,8 @@ GrB_Info GB_tran__abs_uint8_uint8
 
 GrB_Info GB_unop__abs_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5679,8 +5679,8 @@ GrB_Info GB_tran__abs_uint8_uint16
 
 GrB_Info GB_unop__abs_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5697,8 +5697,8 @@ GrB_Info GB_tran__abs_uint8_uint32
 
 GrB_Info GB_unop__abs_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5715,8 +5715,8 @@ GrB_Info GB_tran__abs_uint8_uint64
 
 GrB_Info GB_unop__abs_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5733,8 +5733,8 @@ GrB_Info GB_tran__abs_uint8_fp32
 
 GrB_Info GB_unop__abs_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5751,8 +5751,8 @@ GrB_Info GB_tran__abs_uint8_fp64
 
 GrB_Info GB_unop__abs_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5769,8 +5769,8 @@ GrB_Info GB_tran__abs_uint16_bool
 
 GrB_Info GB_unop__abs_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5787,8 +5787,8 @@ GrB_Info GB_tran__abs_uint16_int8
 
 GrB_Info GB_unop__abs_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5805,8 +5805,8 @@ GrB_Info GB_tran__abs_uint16_int16
 
 GrB_Info GB_unop__abs_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5823,8 +5823,8 @@ GrB_Info GB_tran__abs_uint16_int32
 
 GrB_Info GB_unop__abs_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5841,8 +5841,8 @@ GrB_Info GB_tran__abs_uint16_int64
 
 GrB_Info GB_unop__abs_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5859,8 +5859,8 @@ GrB_Info GB_tran__abs_uint16_uint8
 
 GrB_Info GB_unop__abs_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5877,8 +5877,8 @@ GrB_Info GB_tran__abs_uint16_uint16
 
 GrB_Info GB_unop__abs_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5895,8 +5895,8 @@ GrB_Info GB_tran__abs_uint16_uint32
 
 GrB_Info GB_unop__abs_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5913,8 +5913,8 @@ GrB_Info GB_tran__abs_uint16_uint64
 
 GrB_Info GB_unop__abs_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5931,8 +5931,8 @@ GrB_Info GB_tran__abs_uint16_fp32
 
 GrB_Info GB_unop__abs_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5949,8 +5949,8 @@ GrB_Info GB_tran__abs_uint16_fp64
 
 GrB_Info GB_unop__abs_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5967,8 +5967,8 @@ GrB_Info GB_tran__abs_uint32_bool
 
 GrB_Info GB_unop__abs_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -5985,8 +5985,8 @@ GrB_Info GB_tran__abs_uint32_int8
 
 GrB_Info GB_unop__abs_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6003,8 +6003,8 @@ GrB_Info GB_tran__abs_uint32_int16
 
 GrB_Info GB_unop__abs_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6021,8 +6021,8 @@ GrB_Info GB_tran__abs_uint32_int32
 
 GrB_Info GB_unop__abs_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6039,8 +6039,8 @@ GrB_Info GB_tran__abs_uint32_int64
 
 GrB_Info GB_unop__abs_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6057,8 +6057,8 @@ GrB_Info GB_tran__abs_uint32_uint8
 
 GrB_Info GB_unop__abs_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6075,8 +6075,8 @@ GrB_Info GB_tran__abs_uint32_uint16
 
 GrB_Info GB_unop__abs_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6093,8 +6093,8 @@ GrB_Info GB_tran__abs_uint32_uint32
 
 GrB_Info GB_unop__abs_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6111,8 +6111,8 @@ GrB_Info GB_tran__abs_uint32_uint64
 
 GrB_Info GB_unop__abs_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6129,8 +6129,8 @@ GrB_Info GB_tran__abs_uint32_fp32
 
 GrB_Info GB_unop__abs_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6147,8 +6147,8 @@ GrB_Info GB_tran__abs_uint32_fp64
 
 GrB_Info GB_unop__abs_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6165,8 +6165,8 @@ GrB_Info GB_tran__abs_uint64_bool
 
 GrB_Info GB_unop__abs_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6183,8 +6183,8 @@ GrB_Info GB_tran__abs_uint64_int8
 
 GrB_Info GB_unop__abs_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6201,8 +6201,8 @@ GrB_Info GB_tran__abs_uint64_int16
 
 GrB_Info GB_unop__abs_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6219,8 +6219,8 @@ GrB_Info GB_tran__abs_uint64_int32
 
 GrB_Info GB_unop__abs_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6237,8 +6237,8 @@ GrB_Info GB_tran__abs_uint64_int64
 
 GrB_Info GB_unop__abs_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6255,8 +6255,8 @@ GrB_Info GB_tran__abs_uint64_uint8
 
 GrB_Info GB_unop__abs_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6273,8 +6273,8 @@ GrB_Info GB_tran__abs_uint64_uint16
 
 GrB_Info GB_unop__abs_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6291,8 +6291,8 @@ GrB_Info GB_tran__abs_uint64_uint32
 
 GrB_Info GB_unop__abs_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6309,8 +6309,8 @@ GrB_Info GB_tran__abs_uint64_uint64
 
 GrB_Info GB_unop__abs_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6327,8 +6327,8 @@ GrB_Info GB_tran__abs_uint64_fp32
 
 GrB_Info GB_unop__abs_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6345,8 +6345,8 @@ GrB_Info GB_tran__abs_uint64_fp64
 
 GrB_Info GB_unop__abs_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6363,8 +6363,8 @@ GrB_Info GB_tran__abs_fp32_bool
 
 GrB_Info GB_unop__abs_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6381,8 +6381,8 @@ GrB_Info GB_tran__abs_fp32_int8
 
 GrB_Info GB_unop__abs_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6399,8 +6399,8 @@ GrB_Info GB_tran__abs_fp32_int16
 
 GrB_Info GB_unop__abs_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6417,8 +6417,8 @@ GrB_Info GB_tran__abs_fp32_int32
 
 GrB_Info GB_unop__abs_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6435,8 +6435,8 @@ GrB_Info GB_tran__abs_fp32_int64
 
 GrB_Info GB_unop__abs_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6453,8 +6453,8 @@ GrB_Info GB_tran__abs_fp32_uint8
 
 GrB_Info GB_unop__abs_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6471,8 +6471,8 @@ GrB_Info GB_tran__abs_fp32_uint16
 
 GrB_Info GB_unop__abs_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6489,8 +6489,8 @@ GrB_Info GB_tran__abs_fp32_uint32
 
 GrB_Info GB_unop__abs_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6507,8 +6507,8 @@ GrB_Info GB_tran__abs_fp32_uint64
 
 GrB_Info GB_unop__abs_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6525,8 +6525,8 @@ GrB_Info GB_tran__abs_fp32_fp32
 
 GrB_Info GB_unop__abs_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6543,8 +6543,8 @@ GrB_Info GB_tran__abs_fp32_fp64
 
 GrB_Info GB_unop__abs_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6561,8 +6561,8 @@ GrB_Info GB_tran__abs_fp64_bool
 
 GrB_Info GB_unop__abs_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6579,8 +6579,8 @@ GrB_Info GB_tran__abs_fp64_int8
 
 GrB_Info GB_unop__abs_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6597,8 +6597,8 @@ GrB_Info GB_tran__abs_fp64_int16
 
 GrB_Info GB_unop__abs_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6615,8 +6615,8 @@ GrB_Info GB_tran__abs_fp64_int32
 
 GrB_Info GB_unop__abs_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6633,8 +6633,8 @@ GrB_Info GB_tran__abs_fp64_int64
 
 GrB_Info GB_unop__abs_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6651,8 +6651,8 @@ GrB_Info GB_tran__abs_fp64_uint8
 
 GrB_Info GB_unop__abs_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6669,8 +6669,8 @@ GrB_Info GB_tran__abs_fp64_uint16
 
 GrB_Info GB_unop__abs_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6687,8 +6687,8 @@ GrB_Info GB_tran__abs_fp64_uint32
 
 GrB_Info GB_unop__abs_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6705,8 +6705,8 @@ GrB_Info GB_tran__abs_fp64_uint64
 
 GrB_Info GB_unop__abs_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6723,8 +6723,8 @@ GrB_Info GB_tran__abs_fp64_fp32
 
 GrB_Info GB_unop__abs_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6741,8 +6741,8 @@ GrB_Info GB_tran__abs_fp64_fp64
 
 GrB_Info GB_unop__minv_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6759,8 +6759,8 @@ GrB_Info GB_tran__minv_bool_bool
 
 GrB_Info GB_unop__minv_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6777,8 +6777,8 @@ GrB_Info GB_tran__minv_bool_int8
 
 GrB_Info GB_unop__minv_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6795,8 +6795,8 @@ GrB_Info GB_tran__minv_bool_int16
 
 GrB_Info GB_unop__minv_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6813,8 +6813,8 @@ GrB_Info GB_tran__minv_bool_int32
 
 GrB_Info GB_unop__minv_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6831,8 +6831,8 @@ GrB_Info GB_tran__minv_bool_int64
 
 GrB_Info GB_unop__minv_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6849,8 +6849,8 @@ GrB_Info GB_tran__minv_bool_uint8
 
 GrB_Info GB_unop__minv_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6867,8 +6867,8 @@ GrB_Info GB_tran__minv_bool_uint16
 
 GrB_Info GB_unop__minv_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6885,8 +6885,8 @@ GrB_Info GB_tran__minv_bool_uint32
 
 GrB_Info GB_unop__minv_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6903,8 +6903,8 @@ GrB_Info GB_tran__minv_bool_uint64
 
 GrB_Info GB_unop__minv_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6921,8 +6921,8 @@ GrB_Info GB_tran__minv_bool_fp32
 
 GrB_Info GB_unop__minv_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6939,8 +6939,8 @@ GrB_Info GB_tran__minv_bool_fp64
 
 GrB_Info GB_unop__minv_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6957,8 +6957,8 @@ GrB_Info GB_tran__minv_int8_bool
 
 GrB_Info GB_unop__minv_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6975,8 +6975,8 @@ GrB_Info GB_tran__minv_int8_int8
 
 GrB_Info GB_unop__minv_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -6993,8 +6993,8 @@ GrB_Info GB_tran__minv_int8_int16
 
 GrB_Info GB_unop__minv_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7011,8 +7011,8 @@ GrB_Info GB_tran__minv_int8_int32
 
 GrB_Info GB_unop__minv_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7029,8 +7029,8 @@ GrB_Info GB_tran__minv_int8_int64
 
 GrB_Info GB_unop__minv_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7047,8 +7047,8 @@ GrB_Info GB_tran__minv_int8_uint8
 
 GrB_Info GB_unop__minv_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7065,8 +7065,8 @@ GrB_Info GB_tran__minv_int8_uint16
 
 GrB_Info GB_unop__minv_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7083,8 +7083,8 @@ GrB_Info GB_tran__minv_int8_uint32
 
 GrB_Info GB_unop__minv_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7101,8 +7101,8 @@ GrB_Info GB_tran__minv_int8_uint64
 
 GrB_Info GB_unop__minv_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7119,8 +7119,8 @@ GrB_Info GB_tran__minv_int8_fp32
 
 GrB_Info GB_unop__minv_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7137,8 +7137,8 @@ GrB_Info GB_tran__minv_int8_fp64
 
 GrB_Info GB_unop__minv_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7155,8 +7155,8 @@ GrB_Info GB_tran__minv_int16_bool
 
 GrB_Info GB_unop__minv_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7173,8 +7173,8 @@ GrB_Info GB_tran__minv_int16_int8
 
 GrB_Info GB_unop__minv_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7191,8 +7191,8 @@ GrB_Info GB_tran__minv_int16_int16
 
 GrB_Info GB_unop__minv_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7209,8 +7209,8 @@ GrB_Info GB_tran__minv_int16_int32
 
 GrB_Info GB_unop__minv_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7227,8 +7227,8 @@ GrB_Info GB_tran__minv_int16_int64
 
 GrB_Info GB_unop__minv_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7245,8 +7245,8 @@ GrB_Info GB_tran__minv_int16_uint8
 
 GrB_Info GB_unop__minv_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7263,8 +7263,8 @@ GrB_Info GB_tran__minv_int16_uint16
 
 GrB_Info GB_unop__minv_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7281,8 +7281,8 @@ GrB_Info GB_tran__minv_int16_uint32
 
 GrB_Info GB_unop__minv_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7299,8 +7299,8 @@ GrB_Info GB_tran__minv_int16_uint64
 
 GrB_Info GB_unop__minv_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7317,8 +7317,8 @@ GrB_Info GB_tran__minv_int16_fp32
 
 GrB_Info GB_unop__minv_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7335,8 +7335,8 @@ GrB_Info GB_tran__minv_int16_fp64
 
 GrB_Info GB_unop__minv_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7353,8 +7353,8 @@ GrB_Info GB_tran__minv_int32_bool
 
 GrB_Info GB_unop__minv_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7371,8 +7371,8 @@ GrB_Info GB_tran__minv_int32_int8
 
 GrB_Info GB_unop__minv_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7389,8 +7389,8 @@ GrB_Info GB_tran__minv_int32_int16
 
 GrB_Info GB_unop__minv_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7407,8 +7407,8 @@ GrB_Info GB_tran__minv_int32_int32
 
 GrB_Info GB_unop__minv_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7425,8 +7425,8 @@ GrB_Info GB_tran__minv_int32_int64
 
 GrB_Info GB_unop__minv_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7443,8 +7443,8 @@ GrB_Info GB_tran__minv_int32_uint8
 
 GrB_Info GB_unop__minv_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7461,8 +7461,8 @@ GrB_Info GB_tran__minv_int32_uint16
 
 GrB_Info GB_unop__minv_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7479,8 +7479,8 @@ GrB_Info GB_tran__minv_int32_uint32
 
 GrB_Info GB_unop__minv_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7497,8 +7497,8 @@ GrB_Info GB_tran__minv_int32_uint64
 
 GrB_Info GB_unop__minv_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7515,8 +7515,8 @@ GrB_Info GB_tran__minv_int32_fp32
 
 GrB_Info GB_unop__minv_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7533,8 +7533,8 @@ GrB_Info GB_tran__minv_int32_fp64
 
 GrB_Info GB_unop__minv_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7551,8 +7551,8 @@ GrB_Info GB_tran__minv_int64_bool
 
 GrB_Info GB_unop__minv_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7569,8 +7569,8 @@ GrB_Info GB_tran__minv_int64_int8
 
 GrB_Info GB_unop__minv_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7587,8 +7587,8 @@ GrB_Info GB_tran__minv_int64_int16
 
 GrB_Info GB_unop__minv_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7605,8 +7605,8 @@ GrB_Info GB_tran__minv_int64_int32
 
 GrB_Info GB_unop__minv_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7623,8 +7623,8 @@ GrB_Info GB_tran__minv_int64_int64
 
 GrB_Info GB_unop__minv_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7641,8 +7641,8 @@ GrB_Info GB_tran__minv_int64_uint8
 
 GrB_Info GB_unop__minv_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7659,8 +7659,8 @@ GrB_Info GB_tran__minv_int64_uint16
 
 GrB_Info GB_unop__minv_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7677,8 +7677,8 @@ GrB_Info GB_tran__minv_int64_uint32
 
 GrB_Info GB_unop__minv_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7695,8 +7695,8 @@ GrB_Info GB_tran__minv_int64_uint64
 
 GrB_Info GB_unop__minv_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7713,8 +7713,8 @@ GrB_Info GB_tran__minv_int64_fp32
 
 GrB_Info GB_unop__minv_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7731,8 +7731,8 @@ GrB_Info GB_tran__minv_int64_fp64
 
 GrB_Info GB_unop__minv_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7749,8 +7749,8 @@ GrB_Info GB_tran__minv_uint8_bool
 
 GrB_Info GB_unop__minv_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7767,8 +7767,8 @@ GrB_Info GB_tran__minv_uint8_int8
 
 GrB_Info GB_unop__minv_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7785,8 +7785,8 @@ GrB_Info GB_tran__minv_uint8_int16
 
 GrB_Info GB_unop__minv_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7803,8 +7803,8 @@ GrB_Info GB_tran__minv_uint8_int32
 
 GrB_Info GB_unop__minv_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7821,8 +7821,8 @@ GrB_Info GB_tran__minv_uint8_int64
 
 GrB_Info GB_unop__minv_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7839,8 +7839,8 @@ GrB_Info GB_tran__minv_uint8_uint8
 
 GrB_Info GB_unop__minv_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7857,8 +7857,8 @@ GrB_Info GB_tran__minv_uint8_uint16
 
 GrB_Info GB_unop__minv_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7875,8 +7875,8 @@ GrB_Info GB_tran__minv_uint8_uint32
 
 GrB_Info GB_unop__minv_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7893,8 +7893,8 @@ GrB_Info GB_tran__minv_uint8_uint64
 
 GrB_Info GB_unop__minv_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7911,8 +7911,8 @@ GrB_Info GB_tran__minv_uint8_fp32
 
 GrB_Info GB_unop__minv_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7929,8 +7929,8 @@ GrB_Info GB_tran__minv_uint8_fp64
 
 GrB_Info GB_unop__minv_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7947,8 +7947,8 @@ GrB_Info GB_tran__minv_uint16_bool
 
 GrB_Info GB_unop__minv_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7965,8 +7965,8 @@ GrB_Info GB_tran__minv_uint16_int8
 
 GrB_Info GB_unop__minv_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -7983,8 +7983,8 @@ GrB_Info GB_tran__minv_uint16_int16
 
 GrB_Info GB_unop__minv_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8001,8 +8001,8 @@ GrB_Info GB_tran__minv_uint16_int32
 
 GrB_Info GB_unop__minv_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8019,8 +8019,8 @@ GrB_Info GB_tran__minv_uint16_int64
 
 GrB_Info GB_unop__minv_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8037,8 +8037,8 @@ GrB_Info GB_tran__minv_uint16_uint8
 
 GrB_Info GB_unop__minv_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8055,8 +8055,8 @@ GrB_Info GB_tran__minv_uint16_uint16
 
 GrB_Info GB_unop__minv_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8073,8 +8073,8 @@ GrB_Info GB_tran__minv_uint16_uint32
 
 GrB_Info GB_unop__minv_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8091,8 +8091,8 @@ GrB_Info GB_tran__minv_uint16_uint64
 
 GrB_Info GB_unop__minv_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8109,8 +8109,8 @@ GrB_Info GB_tran__minv_uint16_fp32
 
 GrB_Info GB_unop__minv_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8127,8 +8127,8 @@ GrB_Info GB_tran__minv_uint16_fp64
 
 GrB_Info GB_unop__minv_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8145,8 +8145,8 @@ GrB_Info GB_tran__minv_uint32_bool
 
 GrB_Info GB_unop__minv_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8163,8 +8163,8 @@ GrB_Info GB_tran__minv_uint32_int8
 
 GrB_Info GB_unop__minv_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8181,8 +8181,8 @@ GrB_Info GB_tran__minv_uint32_int16
 
 GrB_Info GB_unop__minv_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8199,8 +8199,8 @@ GrB_Info GB_tran__minv_uint32_int32
 
 GrB_Info GB_unop__minv_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8217,8 +8217,8 @@ GrB_Info GB_tran__minv_uint32_int64
 
 GrB_Info GB_unop__minv_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8235,8 +8235,8 @@ GrB_Info GB_tran__minv_uint32_uint8
 
 GrB_Info GB_unop__minv_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8253,8 +8253,8 @@ GrB_Info GB_tran__minv_uint32_uint16
 
 GrB_Info GB_unop__minv_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8271,8 +8271,8 @@ GrB_Info GB_tran__minv_uint32_uint32
 
 GrB_Info GB_unop__minv_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8289,8 +8289,8 @@ GrB_Info GB_tran__minv_uint32_uint64
 
 GrB_Info GB_unop__minv_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8307,8 +8307,8 @@ GrB_Info GB_tran__minv_uint32_fp32
 
 GrB_Info GB_unop__minv_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8325,8 +8325,8 @@ GrB_Info GB_tran__minv_uint32_fp64
 
 GrB_Info GB_unop__minv_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8343,8 +8343,8 @@ GrB_Info GB_tran__minv_uint64_bool
 
 GrB_Info GB_unop__minv_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8361,8 +8361,8 @@ GrB_Info GB_tran__minv_uint64_int8
 
 GrB_Info GB_unop__minv_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8379,8 +8379,8 @@ GrB_Info GB_tran__minv_uint64_int16
 
 GrB_Info GB_unop__minv_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8397,8 +8397,8 @@ GrB_Info GB_tran__minv_uint64_int32
 
 GrB_Info GB_unop__minv_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8415,8 +8415,8 @@ GrB_Info GB_tran__minv_uint64_int64
 
 GrB_Info GB_unop__minv_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8433,8 +8433,8 @@ GrB_Info GB_tran__minv_uint64_uint8
 
 GrB_Info GB_unop__minv_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8451,8 +8451,8 @@ GrB_Info GB_tran__minv_uint64_uint16
 
 GrB_Info GB_unop__minv_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8469,8 +8469,8 @@ GrB_Info GB_tran__minv_uint64_uint32
 
 GrB_Info GB_unop__minv_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8487,8 +8487,8 @@ GrB_Info GB_tran__minv_uint64_uint64
 
 GrB_Info GB_unop__minv_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8505,8 +8505,8 @@ GrB_Info GB_tran__minv_uint64_fp32
 
 GrB_Info GB_unop__minv_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8523,8 +8523,8 @@ GrB_Info GB_tran__minv_uint64_fp64
 
 GrB_Info GB_unop__minv_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8541,8 +8541,8 @@ GrB_Info GB_tran__minv_fp32_bool
 
 GrB_Info GB_unop__minv_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8559,8 +8559,8 @@ GrB_Info GB_tran__minv_fp32_int8
 
 GrB_Info GB_unop__minv_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8577,8 +8577,8 @@ GrB_Info GB_tran__minv_fp32_int16
 
 GrB_Info GB_unop__minv_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8595,8 +8595,8 @@ GrB_Info GB_tran__minv_fp32_int32
 
 GrB_Info GB_unop__minv_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8613,8 +8613,8 @@ GrB_Info GB_tran__minv_fp32_int64
 
 GrB_Info GB_unop__minv_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8631,8 +8631,8 @@ GrB_Info GB_tran__minv_fp32_uint8
 
 GrB_Info GB_unop__minv_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8649,8 +8649,8 @@ GrB_Info GB_tran__minv_fp32_uint16
 
 GrB_Info GB_unop__minv_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8667,8 +8667,8 @@ GrB_Info GB_tran__minv_fp32_uint32
 
 GrB_Info GB_unop__minv_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8685,8 +8685,8 @@ GrB_Info GB_tran__minv_fp32_uint64
 
 GrB_Info GB_unop__minv_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8703,8 +8703,8 @@ GrB_Info GB_tran__minv_fp32_fp32
 
 GrB_Info GB_unop__minv_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8721,8 +8721,8 @@ GrB_Info GB_tran__minv_fp32_fp64
 
 GrB_Info GB_unop__minv_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8739,8 +8739,8 @@ GrB_Info GB_tran__minv_fp64_bool
 
 GrB_Info GB_unop__minv_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8757,8 +8757,8 @@ GrB_Info GB_tran__minv_fp64_int8
 
 GrB_Info GB_unop__minv_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8775,8 +8775,8 @@ GrB_Info GB_tran__minv_fp64_int16
 
 GrB_Info GB_unop__minv_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8793,8 +8793,8 @@ GrB_Info GB_tran__minv_fp64_int32
 
 GrB_Info GB_unop__minv_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8811,8 +8811,8 @@ GrB_Info GB_tran__minv_fp64_int64
 
 GrB_Info GB_unop__minv_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8829,8 +8829,8 @@ GrB_Info GB_tran__minv_fp64_uint8
 
 GrB_Info GB_unop__minv_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8847,8 +8847,8 @@ GrB_Info GB_tran__minv_fp64_uint16
 
 GrB_Info GB_unop__minv_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8865,8 +8865,8 @@ GrB_Info GB_tran__minv_fp64_uint32
 
 GrB_Info GB_unop__minv_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8883,8 +8883,8 @@ GrB_Info GB_tran__minv_fp64_uint64
 
 GrB_Info GB_unop__minv_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8901,8 +8901,8 @@ GrB_Info GB_tran__minv_fp64_fp32
 
 GrB_Info GB_unop__minv_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8919,8 +8919,8 @@ GrB_Info GB_tran__minv_fp64_fp64
 
 GrB_Info GB_unop__lnot_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8937,8 +8937,8 @@ GrB_Info GB_tran__lnot_bool_bool
 
 GrB_Info GB_unop__lnot_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8955,8 +8955,8 @@ GrB_Info GB_tran__lnot_bool_int8
 
 GrB_Info GB_unop__lnot_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8973,8 +8973,8 @@ GrB_Info GB_tran__lnot_bool_int16
 
 GrB_Info GB_unop__lnot_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -8991,8 +8991,8 @@ GrB_Info GB_tran__lnot_bool_int32
 
 GrB_Info GB_unop__lnot_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9009,8 +9009,8 @@ GrB_Info GB_tran__lnot_bool_int64
 
 GrB_Info GB_unop__lnot_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9027,8 +9027,8 @@ GrB_Info GB_tran__lnot_bool_uint8
 
 GrB_Info GB_unop__lnot_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9045,8 +9045,8 @@ GrB_Info GB_tran__lnot_bool_uint16
 
 GrB_Info GB_unop__lnot_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9063,8 +9063,8 @@ GrB_Info GB_tran__lnot_bool_uint32
 
 GrB_Info GB_unop__lnot_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9081,8 +9081,8 @@ GrB_Info GB_tran__lnot_bool_uint64
 
 GrB_Info GB_unop__lnot_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9099,8 +9099,8 @@ GrB_Info GB_tran__lnot_bool_fp32
 
 GrB_Info GB_unop__lnot_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9117,8 +9117,8 @@ GrB_Info GB_tran__lnot_bool_fp64
 
 GrB_Info GB_unop__lnot_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9135,8 +9135,8 @@ GrB_Info GB_tran__lnot_int8_bool
 
 GrB_Info GB_unop__lnot_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9153,8 +9153,8 @@ GrB_Info GB_tran__lnot_int8_int8
 
 GrB_Info GB_unop__lnot_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9171,8 +9171,8 @@ GrB_Info GB_tran__lnot_int8_int16
 
 GrB_Info GB_unop__lnot_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9189,8 +9189,8 @@ GrB_Info GB_tran__lnot_int8_int32
 
 GrB_Info GB_unop__lnot_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9207,8 +9207,8 @@ GrB_Info GB_tran__lnot_int8_int64
 
 GrB_Info GB_unop__lnot_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9225,8 +9225,8 @@ GrB_Info GB_tran__lnot_int8_uint8
 
 GrB_Info GB_unop__lnot_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9243,8 +9243,8 @@ GrB_Info GB_tran__lnot_int8_uint16
 
 GrB_Info GB_unop__lnot_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9261,8 +9261,8 @@ GrB_Info GB_tran__lnot_int8_uint32
 
 GrB_Info GB_unop__lnot_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9279,8 +9279,8 @@ GrB_Info GB_tran__lnot_int8_uint64
 
 GrB_Info GB_unop__lnot_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9297,8 +9297,8 @@ GrB_Info GB_tran__lnot_int8_fp32
 
 GrB_Info GB_unop__lnot_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9315,8 +9315,8 @@ GrB_Info GB_tran__lnot_int8_fp64
 
 GrB_Info GB_unop__lnot_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9333,8 +9333,8 @@ GrB_Info GB_tran__lnot_int16_bool
 
 GrB_Info GB_unop__lnot_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9351,8 +9351,8 @@ GrB_Info GB_tran__lnot_int16_int8
 
 GrB_Info GB_unop__lnot_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9369,8 +9369,8 @@ GrB_Info GB_tran__lnot_int16_int16
 
 GrB_Info GB_unop__lnot_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9387,8 +9387,8 @@ GrB_Info GB_tran__lnot_int16_int32
 
 GrB_Info GB_unop__lnot_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9405,8 +9405,8 @@ GrB_Info GB_tran__lnot_int16_int64
 
 GrB_Info GB_unop__lnot_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9423,8 +9423,8 @@ GrB_Info GB_tran__lnot_int16_uint8
 
 GrB_Info GB_unop__lnot_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9441,8 +9441,8 @@ GrB_Info GB_tran__lnot_int16_uint16
 
 GrB_Info GB_unop__lnot_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9459,8 +9459,8 @@ GrB_Info GB_tran__lnot_int16_uint32
 
 GrB_Info GB_unop__lnot_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9477,8 +9477,8 @@ GrB_Info GB_tran__lnot_int16_uint64
 
 GrB_Info GB_unop__lnot_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9495,8 +9495,8 @@ GrB_Info GB_tran__lnot_int16_fp32
 
 GrB_Info GB_unop__lnot_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9513,8 +9513,8 @@ GrB_Info GB_tran__lnot_int16_fp64
 
 GrB_Info GB_unop__lnot_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9531,8 +9531,8 @@ GrB_Info GB_tran__lnot_int32_bool
 
 GrB_Info GB_unop__lnot_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9549,8 +9549,8 @@ GrB_Info GB_tran__lnot_int32_int8
 
 GrB_Info GB_unop__lnot_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9567,8 +9567,8 @@ GrB_Info GB_tran__lnot_int32_int16
 
 GrB_Info GB_unop__lnot_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9585,8 +9585,8 @@ GrB_Info GB_tran__lnot_int32_int32
 
 GrB_Info GB_unop__lnot_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9603,8 +9603,8 @@ GrB_Info GB_tran__lnot_int32_int64
 
 GrB_Info GB_unop__lnot_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9621,8 +9621,8 @@ GrB_Info GB_tran__lnot_int32_uint8
 
 GrB_Info GB_unop__lnot_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9639,8 +9639,8 @@ GrB_Info GB_tran__lnot_int32_uint16
 
 GrB_Info GB_unop__lnot_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9657,8 +9657,8 @@ GrB_Info GB_tran__lnot_int32_uint32
 
 GrB_Info GB_unop__lnot_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9675,8 +9675,8 @@ GrB_Info GB_tran__lnot_int32_uint64
 
 GrB_Info GB_unop__lnot_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9693,8 +9693,8 @@ GrB_Info GB_tran__lnot_int32_fp32
 
 GrB_Info GB_unop__lnot_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9711,8 +9711,8 @@ GrB_Info GB_tran__lnot_int32_fp64
 
 GrB_Info GB_unop__lnot_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9729,8 +9729,8 @@ GrB_Info GB_tran__lnot_int64_bool
 
 GrB_Info GB_unop__lnot_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9747,8 +9747,8 @@ GrB_Info GB_tran__lnot_int64_int8
 
 GrB_Info GB_unop__lnot_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9765,8 +9765,8 @@ GrB_Info GB_tran__lnot_int64_int16
 
 GrB_Info GB_unop__lnot_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9783,8 +9783,8 @@ GrB_Info GB_tran__lnot_int64_int32
 
 GrB_Info GB_unop__lnot_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9801,8 +9801,8 @@ GrB_Info GB_tran__lnot_int64_int64
 
 GrB_Info GB_unop__lnot_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9819,8 +9819,8 @@ GrB_Info GB_tran__lnot_int64_uint8
 
 GrB_Info GB_unop__lnot_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9837,8 +9837,8 @@ GrB_Info GB_tran__lnot_int64_uint16
 
 GrB_Info GB_unop__lnot_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9855,8 +9855,8 @@ GrB_Info GB_tran__lnot_int64_uint32
 
 GrB_Info GB_unop__lnot_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9873,8 +9873,8 @@ GrB_Info GB_tran__lnot_int64_uint64
 
 GrB_Info GB_unop__lnot_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9891,8 +9891,8 @@ GrB_Info GB_tran__lnot_int64_fp32
 
 GrB_Info GB_unop__lnot_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9909,8 +9909,8 @@ GrB_Info GB_tran__lnot_int64_fp64
 
 GrB_Info GB_unop__lnot_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9927,8 +9927,8 @@ GrB_Info GB_tran__lnot_uint8_bool
 
 GrB_Info GB_unop__lnot_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9945,8 +9945,8 @@ GrB_Info GB_tran__lnot_uint8_int8
 
 GrB_Info GB_unop__lnot_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9963,8 +9963,8 @@ GrB_Info GB_tran__lnot_uint8_int16
 
 GrB_Info GB_unop__lnot_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9981,8 +9981,8 @@ GrB_Info GB_tran__lnot_uint8_int32
 
 GrB_Info GB_unop__lnot_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -9999,8 +9999,8 @@ GrB_Info GB_tran__lnot_uint8_int64
 
 GrB_Info GB_unop__lnot_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10017,8 +10017,8 @@ GrB_Info GB_tran__lnot_uint8_uint8
 
 GrB_Info GB_unop__lnot_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10035,8 +10035,8 @@ GrB_Info GB_tran__lnot_uint8_uint16
 
 GrB_Info GB_unop__lnot_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10053,8 +10053,8 @@ GrB_Info GB_tran__lnot_uint8_uint32
 
 GrB_Info GB_unop__lnot_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10071,8 +10071,8 @@ GrB_Info GB_tran__lnot_uint8_uint64
 
 GrB_Info GB_unop__lnot_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10089,8 +10089,8 @@ GrB_Info GB_tran__lnot_uint8_fp32
 
 GrB_Info GB_unop__lnot_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10107,8 +10107,8 @@ GrB_Info GB_tran__lnot_uint8_fp64
 
 GrB_Info GB_unop__lnot_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10125,8 +10125,8 @@ GrB_Info GB_tran__lnot_uint16_bool
 
 GrB_Info GB_unop__lnot_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10143,8 +10143,8 @@ GrB_Info GB_tran__lnot_uint16_int8
 
 GrB_Info GB_unop__lnot_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10161,8 +10161,8 @@ GrB_Info GB_tran__lnot_uint16_int16
 
 GrB_Info GB_unop__lnot_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10179,8 +10179,8 @@ GrB_Info GB_tran__lnot_uint16_int32
 
 GrB_Info GB_unop__lnot_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10197,8 +10197,8 @@ GrB_Info GB_tran__lnot_uint16_int64
 
 GrB_Info GB_unop__lnot_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10215,8 +10215,8 @@ GrB_Info GB_tran__lnot_uint16_uint8
 
 GrB_Info GB_unop__lnot_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10233,8 +10233,8 @@ GrB_Info GB_tran__lnot_uint16_uint16
 
 GrB_Info GB_unop__lnot_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10251,8 +10251,8 @@ GrB_Info GB_tran__lnot_uint16_uint32
 
 GrB_Info GB_unop__lnot_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10269,8 +10269,8 @@ GrB_Info GB_tran__lnot_uint16_uint64
 
 GrB_Info GB_unop__lnot_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10287,8 +10287,8 @@ GrB_Info GB_tran__lnot_uint16_fp32
 
 GrB_Info GB_unop__lnot_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10305,8 +10305,8 @@ GrB_Info GB_tran__lnot_uint16_fp64
 
 GrB_Info GB_unop__lnot_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10323,8 +10323,8 @@ GrB_Info GB_tran__lnot_uint32_bool
 
 GrB_Info GB_unop__lnot_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10341,8 +10341,8 @@ GrB_Info GB_tran__lnot_uint32_int8
 
 GrB_Info GB_unop__lnot_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10359,8 +10359,8 @@ GrB_Info GB_tran__lnot_uint32_int16
 
 GrB_Info GB_unop__lnot_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10377,8 +10377,8 @@ GrB_Info GB_tran__lnot_uint32_int32
 
 GrB_Info GB_unop__lnot_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10395,8 +10395,8 @@ GrB_Info GB_tran__lnot_uint32_int64
 
 GrB_Info GB_unop__lnot_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10413,8 +10413,8 @@ GrB_Info GB_tran__lnot_uint32_uint8
 
 GrB_Info GB_unop__lnot_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10431,8 +10431,8 @@ GrB_Info GB_tran__lnot_uint32_uint16
 
 GrB_Info GB_unop__lnot_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10449,8 +10449,8 @@ GrB_Info GB_tran__lnot_uint32_uint32
 
 GrB_Info GB_unop__lnot_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10467,8 +10467,8 @@ GrB_Info GB_tran__lnot_uint32_uint64
 
 GrB_Info GB_unop__lnot_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10485,8 +10485,8 @@ GrB_Info GB_tran__lnot_uint32_fp32
 
 GrB_Info GB_unop__lnot_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10503,8 +10503,8 @@ GrB_Info GB_tran__lnot_uint32_fp64
 
 GrB_Info GB_unop__lnot_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10521,8 +10521,8 @@ GrB_Info GB_tran__lnot_uint64_bool
 
 GrB_Info GB_unop__lnot_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10539,8 +10539,8 @@ GrB_Info GB_tran__lnot_uint64_int8
 
 GrB_Info GB_unop__lnot_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10557,8 +10557,8 @@ GrB_Info GB_tran__lnot_uint64_int16
 
 GrB_Info GB_unop__lnot_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10575,8 +10575,8 @@ GrB_Info GB_tran__lnot_uint64_int32
 
 GrB_Info GB_unop__lnot_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10593,8 +10593,8 @@ GrB_Info GB_tran__lnot_uint64_int64
 
 GrB_Info GB_unop__lnot_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10611,8 +10611,8 @@ GrB_Info GB_tran__lnot_uint64_uint8
 
 GrB_Info GB_unop__lnot_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10629,8 +10629,8 @@ GrB_Info GB_tran__lnot_uint64_uint16
 
 GrB_Info GB_unop__lnot_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10647,8 +10647,8 @@ GrB_Info GB_tran__lnot_uint64_uint32
 
 GrB_Info GB_unop__lnot_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10665,8 +10665,8 @@ GrB_Info GB_tran__lnot_uint64_uint64
 
 GrB_Info GB_unop__lnot_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10683,8 +10683,8 @@ GrB_Info GB_tran__lnot_uint64_fp32
 
 GrB_Info GB_unop__lnot_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10701,8 +10701,8 @@ GrB_Info GB_tran__lnot_uint64_fp64
 
 GrB_Info GB_unop__lnot_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10719,8 +10719,8 @@ GrB_Info GB_tran__lnot_fp32_bool
 
 GrB_Info GB_unop__lnot_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10737,8 +10737,8 @@ GrB_Info GB_tran__lnot_fp32_int8
 
 GrB_Info GB_unop__lnot_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10755,8 +10755,8 @@ GrB_Info GB_tran__lnot_fp32_int16
 
 GrB_Info GB_unop__lnot_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10773,8 +10773,8 @@ GrB_Info GB_tran__lnot_fp32_int32
 
 GrB_Info GB_unop__lnot_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10791,8 +10791,8 @@ GrB_Info GB_tran__lnot_fp32_int64
 
 GrB_Info GB_unop__lnot_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10809,8 +10809,8 @@ GrB_Info GB_tran__lnot_fp32_uint8
 
 GrB_Info GB_unop__lnot_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10827,8 +10827,8 @@ GrB_Info GB_tran__lnot_fp32_uint16
 
 GrB_Info GB_unop__lnot_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10845,8 +10845,8 @@ GrB_Info GB_tran__lnot_fp32_uint32
 
 GrB_Info GB_unop__lnot_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10863,8 +10863,8 @@ GrB_Info GB_tran__lnot_fp32_uint64
 
 GrB_Info GB_unop__lnot_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10881,8 +10881,8 @@ GrB_Info GB_tran__lnot_fp32_fp32
 
 GrB_Info GB_unop__lnot_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10899,8 +10899,8 @@ GrB_Info GB_tran__lnot_fp32_fp64
 
 GrB_Info GB_unop__lnot_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,
+    bool *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10917,8 +10917,8 @@ GrB_Info GB_tran__lnot_fp64_bool
 
 GrB_Info GB_unop__lnot_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10935,8 +10935,8 @@ GrB_Info GB_tran__lnot_fp64_int8
 
 GrB_Info GB_unop__lnot_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10953,8 +10953,8 @@ GrB_Info GB_tran__lnot_fp64_int16
 
 GrB_Info GB_unop__lnot_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10971,8 +10971,8 @@ GrB_Info GB_tran__lnot_fp64_int32
 
 GrB_Info GB_unop__lnot_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -10989,8 +10989,8 @@ GrB_Info GB_tran__lnot_fp64_int64
 
 GrB_Info GB_unop__lnot_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -11007,8 +11007,8 @@ GrB_Info GB_tran__lnot_fp64_uint8
 
 GrB_Info GB_unop__lnot_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -11025,8 +11025,8 @@ GrB_Info GB_tran__lnot_fp64_uint16
 
 GrB_Info GB_unop__lnot_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -11043,8 +11043,8 @@ GrB_Info GB_tran__lnot_fp64_uint32
 
 GrB_Info GB_unop__lnot_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -11061,8 +11061,8 @@ GrB_Info GB_tran__lnot_fp64_uint64
 
 GrB_Info GB_unop__lnot_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,
+    float *Ax,
     int64_t anz,
     int nthreads
 ) ;
@@ -11079,8 +11079,8 @@ GrB_Info GB_tran__lnot_fp64_fp32
 
 GrB_Info GB_unop__lnot_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,
+    double *Ax,
     int64_t anz,
     int nthreads
 ) ;
diff --git a/Source/Generated/GB_unaryop__lnot_bool_bool.c b/Source/Generated/GB_unaryop__lnot_bool_bool.c
index f90ef18f46..d1cb36be05 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_fp32.c b/Source/Generated/GB_unaryop__lnot_bool_fp32.c
index 127e2f3ef8..0b7afa32ec 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_fp64.c b/Source/Generated/GB_unaryop__lnot_bool_fp64.c
index 3569d758fe..5bd5cc50a0 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_int16.c b/Source/Generated/GB_unaryop__lnot_bool_int16.c
index 4ab01eb0b2..fc5e9f527b 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_int32.c b/Source/Generated/GB_unaryop__lnot_bool_int32.c
index 55a5373d8e..5499c745c0 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_int64.c b/Source/Generated/GB_unaryop__lnot_bool_int64.c
index 11aa018564..9d70a193e0 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_int8.c b/Source/Generated/GB_unaryop__lnot_bool_int8.c
index 0a8d0745ab..0d6ed1aa31 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_uint16.c b/Source/Generated/GB_unaryop__lnot_bool_uint16.c
index 3d6d2335c8..52e5f069c1 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_uint32.c b/Source/Generated/GB_unaryop__lnot_bool_uint32.c
index 67f47716c6..27d10d62dc 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_uint64.c b/Source/Generated/GB_unaryop__lnot_bool_uint64.c
index 7f0c3ea626..7d50b361a6 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_bool_uint8.c b/Source/Generated/GB_unaryop__lnot_bool_uint8.c
index 1fcf1c3d3f..c270b065e4 100644
--- a/Source/Generated/GB_unaryop__lnot_bool_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_bool_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    bool z = (bool) x ;
+#define GB_CASTING(z, aij) \
+    bool z = (bool) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_bool.c b/Source/Generated/GB_unaryop__lnot_fp32_bool.c
index eb08685205..42b66dba31 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_fp32.c b/Source/Generated/GB_unaryop__lnot_fp32_fp32.c
index 5d6f0741af..5cac96ae33 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_fp64.c b/Source/Generated/GB_unaryop__lnot_fp32_fp64.c
index 1f3f950f87..73054b561a 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_int16.c b/Source/Generated/GB_unaryop__lnot_fp32_int16.c
index e2dfc17336..a42f969525 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_int32.c b/Source/Generated/GB_unaryop__lnot_fp32_int32.c
index 04e3097ea3..4ca2a292e4 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_int64.c b/Source/Generated/GB_unaryop__lnot_fp32_int64.c
index 9e420a6833..f3e5709891 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_int8.c b/Source/Generated/GB_unaryop__lnot_fp32_int8.c
index feae89bc36..475359dacd 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_uint16.c b/Source/Generated/GB_unaryop__lnot_fp32_uint16.c
index 3f2b45940a..58cbc90785 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_uint32.c b/Source/Generated/GB_unaryop__lnot_fp32_uint32.c
index af9060f92c..4aa020a384 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_uint64.c b/Source/Generated/GB_unaryop__lnot_fp32_uint64.c
index b2c824bcbd..dc77395958 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp32_uint8.c b/Source/Generated/GB_unaryop__lnot_fp32_uint8.c
index 1b2363cd60..f3d54861eb 100644
--- a/Source/Generated/GB_unaryop__lnot_fp32_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_fp32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_bool.c b/Source/Generated/GB_unaryop__lnot_fp64_bool.c
index d6e32f1162..fbdf68f0ce 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_fp32.c b/Source/Generated/GB_unaryop__lnot_fp64_fp32.c
index 9c3c8bea21..659c09b2b7 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_fp64.c b/Source/Generated/GB_unaryop__lnot_fp64_fp64.c
index 8f77bfd081..2f0f45c487 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_int16.c b/Source/Generated/GB_unaryop__lnot_fp64_int16.c
index 4aab82c516..1f8a18dd21 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_int32.c b/Source/Generated/GB_unaryop__lnot_fp64_int32.c
index 7197ae8bb0..c19208aee5 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_int64.c b/Source/Generated/GB_unaryop__lnot_fp64_int64.c
index 314e16f80d..a70f4f5607 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_int8.c b/Source/Generated/GB_unaryop__lnot_fp64_int8.c
index bc31a40634..6f83ae7b6c 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_uint16.c b/Source/Generated/GB_unaryop__lnot_fp64_uint16.c
index b3c98afbde..9714304bf3 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_uint32.c b/Source/Generated/GB_unaryop__lnot_fp64_uint32.c
index 5230e96087..06bda59d4e 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_uint64.c b/Source/Generated/GB_unaryop__lnot_fp64_uint64.c
index 3883c0d532..2bb7f560fc 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_fp64_uint8.c b/Source/Generated/GB_unaryop__lnot_fp64_uint8.c
index 6496da6c4f..81f3b948bc 100644
--- a/Source/Generated/GB_unaryop__lnot_fp64_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_fp64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_bool.c b/Source/Generated/GB_unaryop__lnot_int16_bool.c
index 27aa3e5f51..c964b1285a 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_fp32.c b/Source/Generated/GB_unaryop__lnot_int16_fp32.c
index 03da2acd65..403e2aba3d 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_fp64.c b/Source/Generated/GB_unaryop__lnot_int16_fp64.c
index c10af864aa..bb50ae59ca 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_int16.c b/Source/Generated/GB_unaryop__lnot_int16_int16.c
index e1fd38e5bd..b02e0ce885 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_int32.c b/Source/Generated/GB_unaryop__lnot_int16_int32.c
index 33fdd1f5a1..d28e5cbd8d 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_int64.c b/Source/Generated/GB_unaryop__lnot_int16_int64.c
index d5ffb949da..f1ae83aa50 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_int8.c b/Source/Generated/GB_unaryop__lnot_int16_int8.c
index f018e380b2..249a40b6e1 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_uint16.c b/Source/Generated/GB_unaryop__lnot_int16_uint16.c
index fd2cc6aaea..8cad82b49e 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_uint32.c b/Source/Generated/GB_unaryop__lnot_int16_uint32.c
index 7b24a1b8d1..9cb0af6d6c 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_uint64.c b/Source/Generated/GB_unaryop__lnot_int16_uint64.c
index a272efdd65..364e31e721 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int16_uint8.c b/Source/Generated/GB_unaryop__lnot_int16_uint8.c
index 481d8b3e56..ce850da054 100644
--- a/Source/Generated/GB_unaryop__lnot_int16_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_int16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_bool.c b/Source/Generated/GB_unaryop__lnot_int32_bool.c
index 0e0f77b37f..4a3ce3cdc0 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_fp32.c b/Source/Generated/GB_unaryop__lnot_int32_fp32.c
index 182fb09acd..bc2c4f4ca8 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_fp64.c b/Source/Generated/GB_unaryop__lnot_int32_fp64.c
index e49b68c352..89627d2063 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_int16.c b/Source/Generated/GB_unaryop__lnot_int32_int16.c
index c7d1ed37bc..8c36796884 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_int32.c b/Source/Generated/GB_unaryop__lnot_int32_int32.c
index 7ee46b2b64..b894c5615a 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_int64.c b/Source/Generated/GB_unaryop__lnot_int32_int64.c
index 7b507c2e99..9bfadce7a2 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_int8.c b/Source/Generated/GB_unaryop__lnot_int32_int8.c
index 0a6f4c0b15..4d6b9dd298 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_uint16.c b/Source/Generated/GB_unaryop__lnot_int32_uint16.c
index 996b5dec63..8f3a66dc9d 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_uint32.c b/Source/Generated/GB_unaryop__lnot_int32_uint32.c
index 900eb3cace..fcdc50ae35 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_uint64.c b/Source/Generated/GB_unaryop__lnot_int32_uint64.c
index 9bec31656d..2ee119f7b6 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int32_uint8.c b/Source/Generated/GB_unaryop__lnot_int32_uint8.c
index 0df929cf1c..1eebbaecd4 100644
--- a/Source/Generated/GB_unaryop__lnot_int32_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_int32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_bool.c b/Source/Generated/GB_unaryop__lnot_int64_bool.c
index d9322dd7df..cc4c5663ca 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_fp32.c b/Source/Generated/GB_unaryop__lnot_int64_fp32.c
index 29d03107c6..776a0e9a1a 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_fp64.c b/Source/Generated/GB_unaryop__lnot_int64_fp64.c
index 536668136f..24ecb198e2 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_int16.c b/Source/Generated/GB_unaryop__lnot_int64_int16.c
index cf60bfb5dd..2002f6d34a 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_int32.c b/Source/Generated/GB_unaryop__lnot_int64_int32.c
index 4d6c36abd6..f22b3692e5 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_int64.c b/Source/Generated/GB_unaryop__lnot_int64_int64.c
index 3378ec9004..5a37e211ab 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_int8.c b/Source/Generated/GB_unaryop__lnot_int64_int8.c
index 36072db38c..f53502c1f1 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_uint16.c b/Source/Generated/GB_unaryop__lnot_int64_uint16.c
index 449c172595..72da978a74 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_uint32.c b/Source/Generated/GB_unaryop__lnot_int64_uint32.c
index 0dd854b2bc..e0770c7e7f 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_uint64.c b/Source/Generated/GB_unaryop__lnot_int64_uint64.c
index 6cfd22c564..715a2ed032 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int64_uint8.c b/Source/Generated/GB_unaryop__lnot_int64_uint8.c
index 33b9300d2d..01b3830f8d 100644
--- a/Source/Generated/GB_unaryop__lnot_int64_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_int64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_bool.c b/Source/Generated/GB_unaryop__lnot_int8_bool.c
index 3232aefb2e..30b3bd8d2b 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_fp32.c b/Source/Generated/GB_unaryop__lnot_int8_fp32.c
index a43d41bd90..836b70b744 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_fp64.c b/Source/Generated/GB_unaryop__lnot_int8_fp64.c
index d8f892acc3..8694f008c2 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_int16.c b/Source/Generated/GB_unaryop__lnot_int8_int16.c
index 43dc8b2f01..754ba15124 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_int32.c b/Source/Generated/GB_unaryop__lnot_int8_int32.c
index 80fa7d6210..1e5d8c9803 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_int64.c b/Source/Generated/GB_unaryop__lnot_int8_int64.c
index efb9a2bfd6..3a67354e07 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_int8.c b/Source/Generated/GB_unaryop__lnot_int8_int8.c
index 6bb70a18a8..00f2ffca55 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_uint16.c b/Source/Generated/GB_unaryop__lnot_int8_uint16.c
index d303242921..2ad7e65b8c 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_uint32.c b/Source/Generated/GB_unaryop__lnot_int8_uint32.c
index a8129a85b3..3ca1748049 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_uint64.c b/Source/Generated/GB_unaryop__lnot_int8_uint64.c
index b8f81bb462..64e7dc7872 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_int8_uint8.c b/Source/Generated/GB_unaryop__lnot_int8_uint8.c
index 3971884729..28973e9588 100644
--- a/Source/Generated/GB_unaryop__lnot_int8_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_int8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_bool.c b/Source/Generated/GB_unaryop__lnot_uint16_bool.c
index 079271abc3..a7a0beadaf 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_fp32.c b/Source/Generated/GB_unaryop__lnot_uint16_fp32.c
index 439e36d558..c4ad5d627e 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_fp64.c b/Source/Generated/GB_unaryop__lnot_uint16_fp64.c
index 63244e04cd..35ccd7fb01 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_int16.c b/Source/Generated/GB_unaryop__lnot_uint16_int16.c
index 2f46ac9a1e..58fec48d4d 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_int32.c b/Source/Generated/GB_unaryop__lnot_uint16_int32.c
index 7dac2473fc..33b519f3b7 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_int64.c b/Source/Generated/GB_unaryop__lnot_uint16_int64.c
index 212c6c862b..a57ecb6b1b 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_int8.c b/Source/Generated/GB_unaryop__lnot_uint16_int8.c
index d39caf5544..5501031d19 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_uint16.c b/Source/Generated/GB_unaryop__lnot_uint16_uint16.c
index 907034895e..2bade9393b 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_uint32.c b/Source/Generated/GB_unaryop__lnot_uint16_uint32.c
index 574d11d6d9..df98e346b1 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_uint64.c b/Source/Generated/GB_unaryop__lnot_uint16_uint64.c
index 631d58e168..d9a1ca4dbc 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint16_uint8.c b/Source/Generated/GB_unaryop__lnot_uint16_uint8.c
index c0de962d83..57e562755b 100644
--- a/Source/Generated/GB_unaryop__lnot_uint16_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_bool.c b/Source/Generated/GB_unaryop__lnot_uint32_bool.c
index 84b681a384..0b0ec7f8df 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_fp32.c b/Source/Generated/GB_unaryop__lnot_uint32_fp32.c
index 9181f0ace5..f7267e9afe 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_fp64.c b/Source/Generated/GB_unaryop__lnot_uint32_fp64.c
index 782fea3d06..e864ebdf0b 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_int16.c b/Source/Generated/GB_unaryop__lnot_uint32_int16.c
index 213604f264..e736371f78 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_int32.c b/Source/Generated/GB_unaryop__lnot_uint32_int32.c
index d4b8fbeec4..782a461031 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_int64.c b/Source/Generated/GB_unaryop__lnot_uint32_int64.c
index 22873d2b47..f99c77b5a8 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_int8.c b/Source/Generated/GB_unaryop__lnot_uint32_int8.c
index 263a4b08bf..5bdf697b52 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_uint16.c b/Source/Generated/GB_unaryop__lnot_uint32_uint16.c
index dae0ef565b..bce7951803 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_uint32.c b/Source/Generated/GB_unaryop__lnot_uint32_uint32.c
index 657e8f3df2..565e76f815 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_uint64.c b/Source/Generated/GB_unaryop__lnot_uint32_uint64.c
index 9be24aeb97..00e54c7190 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint32_uint8.c b/Source/Generated/GB_unaryop__lnot_uint32_uint8.c
index e513abecaf..7aa85e0af0 100644
--- a/Source/Generated/GB_unaryop__lnot_uint32_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_bool.c b/Source/Generated/GB_unaryop__lnot_uint64_bool.c
index 1410e1456c..b49c9c4da4 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_fp32.c b/Source/Generated/GB_unaryop__lnot_uint64_fp32.c
index 61c13e6947..6c9b18170e 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_fp64.c b/Source/Generated/GB_unaryop__lnot_uint64_fp64.c
index 5ab8c591cc..32602f065f 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_int16.c b/Source/Generated/GB_unaryop__lnot_uint64_int16.c
index 31a50033cb..712800dd11 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_int32.c b/Source/Generated/GB_unaryop__lnot_uint64_int32.c
index 65b60884b9..95d6ddc407 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_int64.c b/Source/Generated/GB_unaryop__lnot_uint64_int64.c
index 8d21dbdb9b..a5c17817a3 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_int8.c b/Source/Generated/GB_unaryop__lnot_uint64_int8.c
index 0fc688479d..c81160f2eb 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_uint16.c b/Source/Generated/GB_unaryop__lnot_uint64_uint16.c
index 8b23baa951..9db361e521 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_uint32.c b/Source/Generated/GB_unaryop__lnot_uint64_uint32.c
index a30bf79778..5cc60193f9 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_uint64.c b/Source/Generated/GB_unaryop__lnot_uint64_uint64.c
index 7a6fc8b64f..8ce265a51f 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint64_uint8.c b/Source/Generated/GB_unaryop__lnot_uint64_uint8.c
index 19df748100..0ea8c07c87 100644
--- a/Source/Generated/GB_unaryop__lnot_uint64_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_bool.c b/Source/Generated/GB_unaryop__lnot_uint8_bool.c
index 834074e326..f74ceacd72 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_bool.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_fp32.c b/Source/Generated/GB_unaryop__lnot_uint8_fp32.c
index a06ba0b86a..80e48cb2e2 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_fp32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_fp64.c b/Source/Generated/GB_unaryop__lnot_uint8_fp64.c
index 56de7dc7c0..a108f17fca 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_fp64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_int16.c b/Source/Generated/GB_unaryop__lnot_uint8_int16.c
index e3fcf7196c..39e0b99838 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_int16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_int32.c b/Source/Generated/GB_unaryop__lnot_uint8_int32.c
index 76fd7cc10c..f0f845f44e 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_int32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_int64.c b/Source/Generated/GB_unaryop__lnot_uint8_int64.c
index 53252bb8d8..c9f3229482 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_int64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_int8.c b/Source/Generated/GB_unaryop__lnot_uint8_int8.c
index 4766f0dde9..0065d7afca 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_int8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_uint16.c b/Source/Generated/GB_unaryop__lnot_uint8_uint16.c
index 39c5de8227..10c4809018 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_uint16.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_uint32.c b/Source/Generated/GB_unaryop__lnot_uint8_uint32.c
index fe2cbfd036..1904d55bf5 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_uint32.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_uint64.c b/Source/Generated/GB_unaryop__lnot_uint8_uint64.c
index e08c8ed7ea..39a765aba2 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_uint64.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__lnot_uint8_uint8.c b/Source/Generated/GB_unaryop__lnot_uint8_uint8.c
index 25d2709027..53be81b915 100644
--- a/Source/Generated/GB_unaryop__lnot_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__lnot_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = !(x != 0) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__lnot_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_bool.c b/Source/Generated/GB_unaryop__minv_bool_bool.c
index f57e6cf092..235a7a6e11 100644
--- a/Source/Generated/GB_unaryop__minv_bool_bool.c
+++ b/Source/Generated/GB_unaryop__minv_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_fp32.c b/Source/Generated/GB_unaryop__minv_bool_fp32.c
index 24c914396f..4ee7f8ef26 100644
--- a/Source/Generated/GB_unaryop__minv_bool_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_bool_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_fp32
 (
-    bool *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_fp64.c b/Source/Generated/GB_unaryop__minv_bool_fp64.c
index 4eb6864468..4d381a77b0 100644
--- a/Source/Generated/GB_unaryop__minv_bool_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_bool_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_fp64
 (
-    bool *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_int16.c b/Source/Generated/GB_unaryop__minv_bool_int16.c
index 16a39abae4..de1af7153a 100644
--- a/Source/Generated/GB_unaryop__minv_bool_int16.c
+++ b/Source/Generated/GB_unaryop__minv_bool_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_int16
 (
-    bool *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_int32.c b/Source/Generated/GB_unaryop__minv_bool_int32.c
index 42ebc207ad..08b853f768 100644
--- a/Source/Generated/GB_unaryop__minv_bool_int32.c
+++ b/Source/Generated/GB_unaryop__minv_bool_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_int32
 (
-    bool *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_int64.c b/Source/Generated/GB_unaryop__minv_bool_int64.c
index fab48a8861..67199a5aa1 100644
--- a/Source/Generated/GB_unaryop__minv_bool_int64.c
+++ b/Source/Generated/GB_unaryop__minv_bool_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_int64
 (
-    bool *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_int8.c b/Source/Generated/GB_unaryop__minv_bool_int8.c
index df3ff28546..d51dfbc893 100644
--- a/Source/Generated/GB_unaryop__minv_bool_int8.c
+++ b/Source/Generated/GB_unaryop__minv_bool_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_int8
 (
-    bool *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_uint16.c b/Source/Generated/GB_unaryop__minv_bool_uint16.c
index 5a596a21fc..c3d9540ee7 100644
--- a/Source/Generated/GB_unaryop__minv_bool_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_bool_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_uint16
 (
-    bool *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_uint32.c b/Source/Generated/GB_unaryop__minv_bool_uint32.c
index 29925e92ed..534eadd3db 100644
--- a/Source/Generated/GB_unaryop__minv_bool_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_bool_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_uint32
 (
-    bool *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_uint64.c b/Source/Generated/GB_unaryop__minv_bool_uint64.c
index 1c936a4560..feb840529e 100644
--- a/Source/Generated/GB_unaryop__minv_bool_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_bool_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_uint64
 (
-    bool *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_bool_uint8.c b/Source/Generated/GB_unaryop__minv_bool_uint8.c
index ee646b2351..d48f97a9dd 100644
--- a/Source/Generated/GB_unaryop__minv_bool_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_bool_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_bool_uint8
 (
-    bool *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_bool.c b/Source/Generated/GB_unaryop__minv_fp32_bool.c
index 381ce72115..388f5e6645 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_bool.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_bool
 (
-    float *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_fp32.c b/Source/Generated/GB_unaryop__minv_fp32_fp32.c
index 97b3083955..56bde63201 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_fp64.c b/Source/Generated/GB_unaryop__minv_fp32_fp64.c
index 8bb3bce6ad..b6becb9d5c 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_fp64
 (
-    float *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_int16.c b/Source/Generated/GB_unaryop__minv_fp32_int16.c
index 292cb43797..bee383aa93 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_int16.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_int16
 (
-    float *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_int32.c b/Source/Generated/GB_unaryop__minv_fp32_int32.c
index 3e7f837608..fb94b67f24 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_int32.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_int32
 (
-    float *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_int64.c b/Source/Generated/GB_unaryop__minv_fp32_int64.c
index 743cf0fe84..a584f0cedf 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_int64.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_int64
 (
-    float *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_int8.c b/Source/Generated/GB_unaryop__minv_fp32_int8.c
index 4a18542f1b..eee2c91fbd 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_int8.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_int8
 (
-    float *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_uint16.c b/Source/Generated/GB_unaryop__minv_fp32_uint16.c
index a988bb2e4c..9e9882add1 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_uint16
 (
-    float *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_uint32.c b/Source/Generated/GB_unaryop__minv_fp32_uint32.c
index 045b439ad0..1aadbef153 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_uint32
 (
-    float *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_uint64.c b/Source/Generated/GB_unaryop__minv_fp32_uint64.c
index 047ef90444..4ebc24cf94 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_uint64
 (
-    float *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp32_uint8.c b/Source/Generated/GB_unaryop__minv_fp32_uint8.c
index 3cb8f3cce4..a416eaaaf2 100644
--- a/Source/Generated/GB_unaryop__minv_fp32_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_fp32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = (1.0F)/x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    float z = (float) x ;
+#define GB_CASTING(z, aij) \
+    float z = (float) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp32_uint8
 (
-    float *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_bool.c b/Source/Generated/GB_unaryop__minv_fp64_bool.c
index 0f90167bc9..9612cc3573 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_bool.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_bool
 (
-    double *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_fp32.c b/Source/Generated/GB_unaryop__minv_fp64_fp32.c
index 4b804e982d..5b7f3dc9c3 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_fp32
 (
-    double *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_fp64.c b/Source/Generated/GB_unaryop__minv_fp64_fp64.c
index 120d09e6c6..dd16c97921 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_int16.c b/Source/Generated/GB_unaryop__minv_fp64_int16.c
index c13dfc9dd1..d315f6298e 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_int16.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_int16
 (
-    double *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_int32.c b/Source/Generated/GB_unaryop__minv_fp64_int32.c
index 50c0d617e8..c384735725 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_int32.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_int32
 (
-    double *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_int64.c b/Source/Generated/GB_unaryop__minv_fp64_int64.c
index 881ce016c4..6c90c3fc9a 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_int64.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_int64
 (
-    double *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_int8.c b/Source/Generated/GB_unaryop__minv_fp64_int8.c
index b05872f5a7..8bd1bc0de6 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_int8.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_int8
 (
-    double *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_uint16.c b/Source/Generated/GB_unaryop__minv_fp64_uint16.c
index 588333deb0..b191b35773 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_uint16
 (
-    double *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_uint32.c b/Source/Generated/GB_unaryop__minv_fp64_uint32.c
index f5df95d809..f8c336be01 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_uint32
 (
-    double *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_uint64.c b/Source/Generated/GB_unaryop__minv_fp64_uint64.c
index 67f98559c4..d74d5917c8 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_uint64
 (
-    double *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_fp64_uint8.c b/Source/Generated/GB_unaryop__minv_fp64_uint8.c
index d99a86325a..7d31be733a 100644
--- a/Source/Generated/GB_unaryop__minv_fp64_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_fp64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = 1./x ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    double z = (double) x ;
+#define GB_CASTING(z, aij) \
+    double z = (double) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_fp64_uint8
 (
-    double *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_bool.c b/Source/Generated/GB_unaryop__minv_int16_bool.c
index 296b923e74..4a45f0af6a 100644
--- a/Source/Generated/GB_unaryop__minv_int16_bool.c
+++ b/Source/Generated/GB_unaryop__minv_int16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_bool
 (
-    int16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_fp32.c b/Source/Generated/GB_unaryop__minv_int16_fp32.c
index b1601d602c..3eec0b3c54 100644
--- a/Source/Generated/GB_unaryop__minv_int16_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_int16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_fp32
 (
-    int16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_fp64.c b/Source/Generated/GB_unaryop__minv_int16_fp64.c
index db9d5d818a..da256d3cf3 100644
--- a/Source/Generated/GB_unaryop__minv_int16_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_int16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z ; GB_CAST_SIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    int16_t z ; GB_CAST_SIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_fp64
 (
-    int16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_int16.c b/Source/Generated/GB_unaryop__minv_int16_int16.c
index db0fde4b26..e0323213f1 100644
--- a/Source/Generated/GB_unaryop__minv_int16_int16.c
+++ b/Source/Generated/GB_unaryop__minv_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_int32.c b/Source/Generated/GB_unaryop__minv_int16_int32.c
index 6cb82207f6..f4ee2b7c0f 100644
--- a/Source/Generated/GB_unaryop__minv_int16_int32.c
+++ b/Source/Generated/GB_unaryop__minv_int16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_int32
 (
-    int16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_int64.c b/Source/Generated/GB_unaryop__minv_int16_int64.c
index 74a19e79da..d81c222134 100644
--- a/Source/Generated/GB_unaryop__minv_int16_int64.c
+++ b/Source/Generated/GB_unaryop__minv_int16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_int64
 (
-    int16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_int8.c b/Source/Generated/GB_unaryop__minv_int16_int8.c
index b3bfbbb3ba..08c1d5ca4d 100644
--- a/Source/Generated/GB_unaryop__minv_int16_int8.c
+++ b/Source/Generated/GB_unaryop__minv_int16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_int8
 (
-    int16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_uint16.c b/Source/Generated/GB_unaryop__minv_int16_uint16.c
index 39f9630839..02bda89af7 100644
--- a/Source/Generated/GB_unaryop__minv_int16_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_int16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_uint16
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_uint32.c b/Source/Generated/GB_unaryop__minv_int16_uint32.c
index bf4345542e..3f3ef9ee91 100644
--- a/Source/Generated/GB_unaryop__minv_int16_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_int16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_uint32
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_uint64.c b/Source/Generated/GB_unaryop__minv_int16_uint64.c
index 40f89441c7..1999a6211e 100644
--- a/Source/Generated/GB_unaryop__minv_int16_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_int16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_uint64
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int16_uint8.c b/Source/Generated/GB_unaryop__minv_int16_uint8.c
index d6c4e2a861..b253d67283 100644
--- a/Source/Generated/GB_unaryop__minv_int16_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_int16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int16_t z = (int16_t) x ;
+#define GB_CASTING(z, aij) \
+    int16_t z = (int16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int16_uint8
 (
-    int16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_bool.c b/Source/Generated/GB_unaryop__minv_int32_bool.c
index 6b1d39249d..97977ca872 100644
--- a/Source/Generated/GB_unaryop__minv_int32_bool.c
+++ b/Source/Generated/GB_unaryop__minv_int32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_bool
 (
-    int32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_fp32.c b/Source/Generated/GB_unaryop__minv_int32_fp32.c
index 155956d771..7a1d4bd008 100644
--- a/Source/Generated/GB_unaryop__minv_int32_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_int32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_fp32
 (
-    int32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_fp64.c b/Source/Generated/GB_unaryop__minv_int32_fp64.c
index febffed762..0e8c1c99fa 100644
--- a/Source/Generated/GB_unaryop__minv_int32_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_int32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z ; GB_CAST_SIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    int32_t z ; GB_CAST_SIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_fp64
 (
-    int32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_int16.c b/Source/Generated/GB_unaryop__minv_int32_int16.c
index d235e43198..d0e17a87b2 100644
--- a/Source/Generated/GB_unaryop__minv_int32_int16.c
+++ b/Source/Generated/GB_unaryop__minv_int32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_int16
 (
-    int32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_int32.c b/Source/Generated/GB_unaryop__minv_int32_int32.c
index 0b27411a35..1abb34858c 100644
--- a/Source/Generated/GB_unaryop__minv_int32_int32.c
+++ b/Source/Generated/GB_unaryop__minv_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_int64.c b/Source/Generated/GB_unaryop__minv_int32_int64.c
index 0b30daf449..e84cd4b962 100644
--- a/Source/Generated/GB_unaryop__minv_int32_int64.c
+++ b/Source/Generated/GB_unaryop__minv_int32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_int64
 (
-    int32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_int8.c b/Source/Generated/GB_unaryop__minv_int32_int8.c
index b1d675790c..9c9ae4b480 100644
--- a/Source/Generated/GB_unaryop__minv_int32_int8.c
+++ b/Source/Generated/GB_unaryop__minv_int32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_int8
 (
-    int32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_uint16.c b/Source/Generated/GB_unaryop__minv_int32_uint16.c
index 11b5d8cd49..020faa320c 100644
--- a/Source/Generated/GB_unaryop__minv_int32_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_int32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_uint16
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_uint32.c b/Source/Generated/GB_unaryop__minv_int32_uint32.c
index 9ef78200b3..5388652221 100644
--- a/Source/Generated/GB_unaryop__minv_int32_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_int32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_uint32
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_uint64.c b/Source/Generated/GB_unaryop__minv_int32_uint64.c
index 0c6c6867fa..fd997fd4dd 100644
--- a/Source/Generated/GB_unaryop__minv_int32_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_int32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_uint64
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int32_uint8.c b/Source/Generated/GB_unaryop__minv_int32_uint8.c
index dc086f8b47..8027705d00 100644
--- a/Source/Generated/GB_unaryop__minv_int32_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_int32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int32_t z = (int32_t) x ;
+#define GB_CASTING(z, aij) \
+    int32_t z = (int32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int32_uint8
 (
-    int32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_bool.c b/Source/Generated/GB_unaryop__minv_int64_bool.c
index 3bd417cb1a..e906255091 100644
--- a/Source/Generated/GB_unaryop__minv_int64_bool.c
+++ b/Source/Generated/GB_unaryop__minv_int64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_bool
 (
-    int64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_fp32.c b/Source/Generated/GB_unaryop__minv_int64_fp32.c
index 62369ad40f..8ba2d6abf3 100644
--- a/Source/Generated/GB_unaryop__minv_int64_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_int64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_fp32
 (
-    int64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_fp64.c b/Source/Generated/GB_unaryop__minv_int64_fp64.c
index 5d2da661e8..2cd74be2c2 100644
--- a/Source/Generated/GB_unaryop__minv_int64_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_int64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z ; GB_CAST_SIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    int64_t z ; GB_CAST_SIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_fp64
 (
-    int64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_int16.c b/Source/Generated/GB_unaryop__minv_int64_int16.c
index 7d296a4969..bf53eb609a 100644
--- a/Source/Generated/GB_unaryop__minv_int64_int16.c
+++ b/Source/Generated/GB_unaryop__minv_int64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_int16
 (
-    int64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_int32.c b/Source/Generated/GB_unaryop__minv_int64_int32.c
index d562835790..fb25c95dc7 100644
--- a/Source/Generated/GB_unaryop__minv_int64_int32.c
+++ b/Source/Generated/GB_unaryop__minv_int64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_int32
 (
-    int64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_int64.c b/Source/Generated/GB_unaryop__minv_int64_int64.c
index 0d16355c17..94010f996a 100644
--- a/Source/Generated/GB_unaryop__minv_int64_int64.c
+++ b/Source/Generated/GB_unaryop__minv_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_int8.c b/Source/Generated/GB_unaryop__minv_int64_int8.c
index 8723c56ec3..96ed97c44c 100644
--- a/Source/Generated/GB_unaryop__minv_int64_int8.c
+++ b/Source/Generated/GB_unaryop__minv_int64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_int8
 (
-    int64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_uint16.c b/Source/Generated/GB_unaryop__minv_int64_uint16.c
index cb71bd81ed..be002492cb 100644
--- a/Source/Generated/GB_unaryop__minv_int64_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_int64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_uint16
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_uint32.c b/Source/Generated/GB_unaryop__minv_int64_uint32.c
index 120dee5401..ce43a6bff2 100644
--- a/Source/Generated/GB_unaryop__minv_int64_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_int64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_uint32
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_uint64.c b/Source/Generated/GB_unaryop__minv_int64_uint64.c
index d330ce2597..08155f7175 100644
--- a/Source/Generated/GB_unaryop__minv_int64_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_int64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_uint64
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int64_uint8.c b/Source/Generated/GB_unaryop__minv_int64_uint8.c
index cf1a355a55..1108172eae 100644
--- a/Source/Generated/GB_unaryop__minv_int64_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_int64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int64_t z = (int64_t) x ;
+#define GB_CASTING(z, aij) \
+    int64_t z = (int64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int64_uint8
 (
-    int64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_bool.c b/Source/Generated/GB_unaryop__minv_int8_bool.c
index 95046e2a0d..17e87d61cd 100644
--- a/Source/Generated/GB_unaryop__minv_int8_bool.c
+++ b/Source/Generated/GB_unaryop__minv_int8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_bool
 (
-    int8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_fp32.c b/Source/Generated/GB_unaryop__minv_int8_fp32.c
index 3d6362589b..215074be22 100644
--- a/Source/Generated/GB_unaryop__minv_int8_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_int8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_fp32
 (
-    int8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_fp64.c b/Source/Generated/GB_unaryop__minv_int8_fp64.c
index 384ff9aed6..745b8fdf61 100644
--- a/Source/Generated/GB_unaryop__minv_int8_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_int8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z ; GB_CAST_SIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    int8_t z ; GB_CAST_SIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_fp64
 (
-    int8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_int16.c b/Source/Generated/GB_unaryop__minv_int8_int16.c
index ba9229047c..2f039a7ed8 100644
--- a/Source/Generated/GB_unaryop__minv_int8_int16.c
+++ b/Source/Generated/GB_unaryop__minv_int8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_int16
 (
-    int8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_int32.c b/Source/Generated/GB_unaryop__minv_int8_int32.c
index 7e863125ed..7906c74fc4 100644
--- a/Source/Generated/GB_unaryop__minv_int8_int32.c
+++ b/Source/Generated/GB_unaryop__minv_int8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_int32
 (
-    int8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_int64.c b/Source/Generated/GB_unaryop__minv_int8_int64.c
index bc019342d6..31c97a9a72 100644
--- a/Source/Generated/GB_unaryop__minv_int8_int64.c
+++ b/Source/Generated/GB_unaryop__minv_int8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_int64
 (
-    int8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_int8.c b/Source/Generated/GB_unaryop__minv_int8_int8.c
index e29dd815ad..31418a0f11 100644
--- a/Source/Generated/GB_unaryop__minv_int8_int8.c
+++ b/Source/Generated/GB_unaryop__minv_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_uint16.c b/Source/Generated/GB_unaryop__minv_int8_uint16.c
index 17bc863777..94151a1b95 100644
--- a/Source/Generated/GB_unaryop__minv_int8_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_int8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_uint16
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_uint32.c b/Source/Generated/GB_unaryop__minv_int8_uint32.c
index 37fa1f6e95..7cbdcd6f89 100644
--- a/Source/Generated/GB_unaryop__minv_int8_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_int8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_uint32
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_uint64.c b/Source/Generated/GB_unaryop__minv_int8_uint64.c
index 2fcd460171..bb2f9a9f66 100644
--- a/Source/Generated/GB_unaryop__minv_int8_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_int8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_uint64
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_int8_uint8.c b/Source/Generated/GB_unaryop__minv_int8_uint8.c
index 395613bd29..7f1512d07c 100644
--- a/Source/Generated/GB_unaryop__minv_int8_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_int8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_SIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    int8_t z = (int8_t) x ;
+#define GB_CASTING(z, aij) \
+    int8_t z = (int8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_int8_uint8
 (
-    int8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_bool.c b/Source/Generated/GB_unaryop__minv_uint16_bool.c
index 1a1a666487..27c86a1e9c 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_bool.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_bool
 (
-    uint16_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_fp32.c b/Source/Generated/GB_unaryop__minv_uint16_fp32.c
index 73edd82574..a02a007023 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_fp32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_fp64.c b/Source/Generated/GB_unaryop__minv_uint16_fp64.c
index 0c7fec5138..543a97edc4 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z ; GB_CAST_UNSIGNED(z,x,16) ;
+#define GB_CASTING(z, aij) \
+    uint16_t z ; GB_CAST_UNSIGNED(z,aij,16) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_fp64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_int16.c b/Source/Generated/GB_unaryop__minv_uint16_int16.c
index 99b24c36f2..7fa1768189 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_int16.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_int16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_int32.c b/Source/Generated/GB_unaryop__minv_uint16_int32.c
index d7541caef7..ce9bda7b3a 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_int32.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_int32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_int64.c b/Source/Generated/GB_unaryop__minv_uint16_int64.c
index cf93542468..7bfd916c1f 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_int64.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_int64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_int8.c b/Source/Generated/GB_unaryop__minv_uint16_int8.c
index 77f70e5202..04219ca4ea 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_int8.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_int8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_uint16.c b/Source/Generated/GB_unaryop__minv_uint16_uint16.c
index ab5c77ea39..3d7bc2a86d 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_uint32.c b/Source/Generated/GB_unaryop__minv_uint16_uint32.c
index ab60e3eefc..72e69d0930 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_uint32
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_uint64.c b/Source/Generated/GB_unaryop__minv_uint16_uint64.c
index 4d1e255b8b..0de8b8a8a7 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_uint64
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint16_uint8.c b/Source/Generated/GB_unaryop__minv_uint16_uint8.c
index c407d90430..e84a9bce05 100644
--- a/Source/Generated/GB_unaryop__minv_uint16_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_uint16_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 16) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint16_t z = (uint16_t) x ;
+#define GB_CASTING(z, aij) \
+    uint16_t z = (uint16_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint16_uint8
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_bool.c b/Source/Generated/GB_unaryop__minv_uint32_bool.c
index 6300f23b03..f48826de43 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_bool.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_bool
 (
-    uint32_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_fp32.c b/Source/Generated/GB_unaryop__minv_uint32_fp32.c
index 4aba7d68d3..c00469e336 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_fp32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_fp64.c b/Source/Generated/GB_unaryop__minv_uint32_fp64.c
index fb4e33909c..74c1d68176 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z ; GB_CAST_UNSIGNED(z,x,32) ;
+#define GB_CASTING(z, aij) \
+    uint32_t z ; GB_CAST_UNSIGNED(z,aij,32) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_fp64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_int16.c b/Source/Generated/GB_unaryop__minv_uint32_int16.c
index e658800682..eac8118422 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_int16.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_int16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_int32.c b/Source/Generated/GB_unaryop__minv_uint32_int32.c
index 998338d5ef..d898356c4d 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_int32.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_int32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_int64.c b/Source/Generated/GB_unaryop__minv_uint32_int64.c
index e22dc8d7f9..39db9f73b6 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_int64.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_int64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_int8.c b/Source/Generated/GB_unaryop__minv_uint32_int8.c
index 30f6febb32..8b3b4905f6 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_int8.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_int8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_uint16.c b/Source/Generated/GB_unaryop__minv_uint32_uint16.c
index e79f91834f..bba8b176cb 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_uint16
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_uint32.c b/Source/Generated/GB_unaryop__minv_uint32_uint32.c
index 9362eb5c78..d6ac48c7bc 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_uint64.c b/Source/Generated/GB_unaryop__minv_uint32_uint64.c
index 7ed4c2c715..9fa6ac4cfa 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_uint64
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint32_uint8.c b/Source/Generated/GB_unaryop__minv_uint32_uint8.c
index 8cc42e204a..90084f9ed2 100644
--- a/Source/Generated/GB_unaryop__minv_uint32_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_uint32_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 32) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint32_t z = (uint32_t) x ;
+#define GB_CASTING(z, aij) \
+    uint32_t z = (uint32_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint32_uint8
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_bool.c b/Source/Generated/GB_unaryop__minv_uint64_bool.c
index 3b32456f77..f70cc87a69 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_bool.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_bool
 (
-    uint64_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_fp32.c b/Source/Generated/GB_unaryop__minv_uint64_fp32.c
index ba4ccc36da..7ed5067b68 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_fp32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_fp64.c b/Source/Generated/GB_unaryop__minv_uint64_fp64.c
index 9141f2e997..ba9eaecff4 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z ; GB_CAST_UNSIGNED(z,x,64) ;
+#define GB_CASTING(z, aij) \
+    uint64_t z ; GB_CAST_UNSIGNED(z,aij,64) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_fp64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_int16.c b/Source/Generated/GB_unaryop__minv_uint64_int16.c
index 05acc827f5..2cbed9a946 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_int16.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_int16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_int32.c b/Source/Generated/GB_unaryop__minv_uint64_int32.c
index 9fe51628fe..48eb90213a 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_int32.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_int32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_int64.c b/Source/Generated/GB_unaryop__minv_uint64_int64.c
index 4bbfce2db1..58cc740203 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_int64.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_int64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_int8.c b/Source/Generated/GB_unaryop__minv_uint64_int8.c
index df309e631b..59fd916084 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_int8.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_int8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_uint16.c b/Source/Generated/GB_unaryop__minv_uint64_uint16.c
index 212c187d8b..38ab4ded63 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_uint16
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_uint32.c b/Source/Generated/GB_unaryop__minv_uint64_uint32.c
index 4825e05998..4af78020bf 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_uint32
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_uint64.c b/Source/Generated/GB_unaryop__minv_uint64_uint64.c
index 1520ec7b93..8a896e5c36 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint64_uint8.c b/Source/Generated/GB_unaryop__minv_uint64_uint8.c
index 801e36625b..46825a1760 100644
--- a/Source/Generated/GB_unaryop__minv_uint64_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_uint64_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 64) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint64_t z = (uint64_t) x ;
+#define GB_CASTING(z, aij) \
+    uint64_t z = (uint64_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint64_uint8
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_bool.c b/Source/Generated/GB_unaryop__minv_uint8_bool.c
index d073d26074..3c06152ac5 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_bool.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_bool
 (
-    uint8_t *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_fp32.c b/Source/Generated/GB_unaryop__minv_uint8_fp32.c
index ddeb08c8ed..57a39b03fd 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_fp32.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_fp32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_fp64.c b/Source/Generated/GB_unaryop__minv_uint8_fp64.c
index 83b1594d5d..cd513718d4 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_fp64.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z ; GB_CAST_UNSIGNED(z,x,8) ;
+#define GB_CASTING(z, aij) \
+    uint8_t z ; GB_CAST_UNSIGNED(z,aij,8) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_fp64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_int16.c b/Source/Generated/GB_unaryop__minv_uint8_int16.c
index cd92533e83..4d6117e794 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_int16.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_int16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_int32.c b/Source/Generated/GB_unaryop__minv_uint8_int32.c
index 99af477f64..97cf980340 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_int32.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_int32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_int64.c b/Source/Generated/GB_unaryop__minv_uint8_int64.c
index da9c262648..c710d219d6 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_int64.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_int64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_int8.c b/Source/Generated/GB_unaryop__minv_uint8_int8.c
index bee84d2b9e..7195ae210c 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_int8.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_int8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_uint16.c b/Source/Generated/GB_unaryop__minv_uint8_uint16.c
index 07f5af28bb..9545e9a58e 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_uint16.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_uint16
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_uint32.c b/Source/Generated/GB_unaryop__minv_uint8_uint32.c
index 2efa912f8a..862cc5e5ad 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_uint32.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_uint32
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_uint64.c b/Source/Generated/GB_unaryop__minv_uint8_uint64.c
index ccf00b42d6..0805f9dab7 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_uint64.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_uint64
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__minv_uint8_uint8.c b/Source/Generated/GB_unaryop__minv_uint8_uint8.c
index 8a96eda247..eef7c4abdb 100644
--- a/Source/Generated/GB_unaryop__minv_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__minv_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     z = GB_IMINV_UNSIGNED (x, 8) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    uint8_t z = (uint8_t) x ;
+#define GB_CASTING(z, aij) \
+    uint8_t z = (uint8_t) aij ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__minv_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_bool_bool.c b/Source/Generated/GB_unaryop__one_bool_bool.c
index 0e834ba690..e18a5ce056 100644
--- a/Source/Generated/GB_unaryop__one_bool_bool.c
+++ b/Source/Generated/GB_unaryop__one_bool_bool.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = true ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_bool_bool
 (
-    bool *GB_RESTRICT Cx,
-    const bool *GB_RESTRICT Ax,
+    bool *Cx,       // Cx and Ax may be aliased
+    bool *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_fp32_fp32.c b/Source/Generated/GB_unaryop__one_fp32_fp32.c
index b3574d7f9b..321924977a 100644
--- a/Source/Generated/GB_unaryop__one_fp32_fp32.c
+++ b/Source/Generated/GB_unaryop__one_fp32_fp32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_fp32_fp32
 (
-    float *GB_RESTRICT Cx,
-    const float *GB_RESTRICT Ax,
+    float *Cx,       // Cx and Ax may be aliased
+    float *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_fp64_fp64.c b/Source/Generated/GB_unaryop__one_fp64_fp64.c
index a6384987c6..f957e642c6 100644
--- a/Source/Generated/GB_unaryop__one_fp64_fp64.c
+++ b/Source/Generated/GB_unaryop__one_fp64_fp64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_fp64_fp64
 (
-    double *GB_RESTRICT Cx,
-    const double *GB_RESTRICT Ax,
+    double *Cx,       // Cx and Ax may be aliased
+    double *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_int16_int16.c b/Source/Generated/GB_unaryop__one_int16_int16.c
index 6ac74640f4..f756f950d2 100644
--- a/Source/Generated/GB_unaryop__one_int16_int16.c
+++ b/Source/Generated/GB_unaryop__one_int16_int16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_int16_int16
 (
-    int16_t *GB_RESTRICT Cx,
-    const int16_t *GB_RESTRICT Ax,
+    int16_t *Cx,       // Cx and Ax may be aliased
+    int16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_int32_int32.c b/Source/Generated/GB_unaryop__one_int32_int32.c
index 16a68b50b7..9fff3eb797 100644
--- a/Source/Generated/GB_unaryop__one_int32_int32.c
+++ b/Source/Generated/GB_unaryop__one_int32_int32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_int32_int32
 (
-    int32_t *GB_RESTRICT Cx,
-    const int32_t *GB_RESTRICT Ax,
+    int32_t *Cx,       // Cx and Ax may be aliased
+    int32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_int64_int64.c b/Source/Generated/GB_unaryop__one_int64_int64.c
index 05ab3e1af9..75184565d1 100644
--- a/Source/Generated/GB_unaryop__one_int64_int64.c
+++ b/Source/Generated/GB_unaryop__one_int64_int64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_int64_int64
 (
-    int64_t *GB_RESTRICT Cx,
-    const int64_t *GB_RESTRICT Ax,
+    int64_t *Cx,       // Cx and Ax may be aliased
+    int64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_int8_int8.c b/Source/Generated/GB_unaryop__one_int8_int8.c
index 03d643a0d5..f103849046 100644
--- a/Source/Generated/GB_unaryop__one_int8_int8.c
+++ b/Source/Generated/GB_unaryop__one_int8_int8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_int8_int8
 (
-    int8_t *GB_RESTRICT Cx,
-    const int8_t *GB_RESTRICT Ax,
+    int8_t *Cx,       // Cx and Ax may be aliased
+    int8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_uint16_uint16.c b/Source/Generated/GB_unaryop__one_uint16_uint16.c
index 8fb2297b34..40cbea4c93 100644
--- a/Source/Generated/GB_unaryop__one_uint16_uint16.c
+++ b/Source/Generated/GB_unaryop__one_uint16_uint16.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_uint16_uint16
 (
-    uint16_t *GB_RESTRICT Cx,
-    const uint16_t *GB_RESTRICT Ax,
+    uint16_t *Cx,       // Cx and Ax may be aliased
+    uint16_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_uint32_uint32.c b/Source/Generated/GB_unaryop__one_uint32_uint32.c
index a7ecc9ad5c..58f030c9c2 100644
--- a/Source/Generated/GB_unaryop__one_uint32_uint32.c
+++ b/Source/Generated/GB_unaryop__one_uint32_uint32.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_uint32_uint32
 (
-    uint32_t *GB_RESTRICT Cx,
-    const uint32_t *GB_RESTRICT Ax,
+    uint32_t *Cx,       // Cx and Ax may be aliased
+    uint32_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_uint64_uint64.c b/Source/Generated/GB_unaryop__one_uint64_uint64.c
index 31c855d5f6..256f17ffa0 100644
--- a/Source/Generated/GB_unaryop__one_uint64_uint64.c
+++ b/Source/Generated/GB_unaryop__one_uint64_uint64.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_uint64_uint64
 (
-    uint64_t *GB_RESTRICT Cx,
-    const uint64_t *GB_RESTRICT Ax,
+    uint64_t *Cx,       // Cx and Ax may be aliased
+    uint64_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generated/GB_unaryop__one_uint8_uint8.c b/Source/Generated/GB_unaryop__one_uint8_uint8.c
index 61c38d4464..4344977c4b 100644
--- a/Source/Generated/GB_unaryop__one_uint8_uint8.c
+++ b/Source/Generated/GB_unaryop__one_uint8_uint8.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,7 +42,7 @@
     z = 1 ;
 
 // casting
-#define GB_CASTING(z, x)   \
+#define GB_CASTING(z, aij) \
     ; ;
 
 // cij = op (cast (aij))
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop__one_uint8_uint8
 (
-    uint8_t *GB_RESTRICT Cx,
-    const uint8_t *GB_RESTRICT Ax,
+    uint8_t *Cx,       // Cx and Ax may be aliased
+    uint8_t *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generator/GB_AxB.c b/Source/Generator/GB_AxB.c
index 98068c5e2a..312c01fbf9 100644
--- a/Source/Generator/GB_AxB.c
+++ b/Source/Generator/GB_AxB.c
@@ -2,7 +2,7 @@
 // GB_AxB:  hard-coded functions for semiring: C<M>=A*B or A'*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,26 +13,30 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
-#include "GB_Sauna.h"
-#include "GB_jappend.h"
 #include "GB_bracket.h"
 #include "GB_iterator.h"
+#include "GB_sort.h"
+#include "GB_atomics.h"
+#include "GB_AxB_saxpy3.h"
 #include "GB_AxB__include.h"
 
 // The C=A*B semiring is defined by the following types and operators:
 
-// A*B function (Gustavon):  GB_AgusB
 // A'*B function (dot2):     GB_Adot2B
 // A'*B function (dot3):     GB_Adot3B
-// A*B function (heap):      GB_AheapB
+// C+=A'*B function (dot4):  GB_Adot4B
+// A*B function (saxpy3):    GB_Asaxpy3B
 
 // C type:   GB_ctype
 // A type:   GB_atype
 // B type:   GB_btype
 
 // Multiply: GB_MULTIPLY(z,aik,bkj)
-// Add:      GB_ADD(cij, z)
-// MultAdd:  GB_MULTIPLY_ADD(cij,aik,bkj)
+// Add:      GB_add_update(cij, z)
+//           'any' monoid?  GB_is_any_monoid
+//           atomic?        GB_has_atomic
+//           OpenMP atomic? GB_has_omp_atomic
+// MultAdd:  GB_multiply_add(cij,aik,bkj)
 // Identity: GB_identity
 // Terminal: GB_terminal
 
@@ -56,17 +60,14 @@
 #define GB_CX(p) Cx [p]
 
 // multiply operator
-#define GB_MULT(z, x, y)        \
-    GB_MULTIPLY(z, x, y) ;
+#define GB_MULT(z, x, y) \
+    GB_MULTIPLY(z, x, y)
 
 // multiply-add
-#define GB_MULTADD(z, x, y)     \
-    GB_MULTIPLY_ADD(z, x, y) ;
+#define GB_MULTADD(z, x, y) \
+    GB_multiply_add(z, x, y)
 
-// copy scalar
-#define GB_COPY_C(z,x) z = x ;
-
-// monoid identity value (Gustavson's method only, with no mask)
+// monoid identity value
 #define GB_IDENTITY \
     GB_identity
 
@@ -74,50 +75,101 @@
 #define GB_DOT_TERMINAL(cij) \
     GB_terminal
 
-// simd pragma for dot product
-#define GB_DOT_SIMD \
-    GB_dot_simd
+// simd pragma for dot-product loop vectorization
+#define GB_PRAGMA_VECTORIZE_DOT \
+    GB_dot_simd_vectorize
 
-// cij is not a pointer but a scalar; nothing to do
-#define GB_CIJ_REACQUIRE(cij,cnz) ;
+// simd pragma for other loop vectorization
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
 
 // declare the cij scalar
-#define GB_CIJ_DECLARE(cij) ; \
-    GB_ctype cij ;
+#define GB_CIJ_DECLARE(cij) \
+    GB_ctype cij
 
 // save the value of C(i,j)
-#define GB_CIJ_SAVE(cij,p) Cx [p] = cij ;
+#define GB_CIJ_SAVE(cij,p) Cx [p] = cij
+
+// cij = Cx [pC]
+#define GB_GETC(cij,pC) \
+    cij = Cx [pC]
+
+// Cx [pC] = cij
+#define GB_PUTC(cij,pC) \
+    Cx [pC] = cij
+
+// Cx [p] = t
+#define GB_CIJ_WRITE(p,t) Cx [p] = t
+
+// C(i,j) += t
+#define GB_CIJ_UPDATE(p,t) \
+    GB_add_update(Cx [p], t)
+
+// x + y
+#define GB_ADD_FUNCTION(x,y) \
+    GB_add_function(x, y)
+
+// type with size of GB_CTYPE, and can be used in compare-and-swap
+#define GB_CTYPE_PUN \
+    GB_ctype_pun
+
+// bit pattern for bool, 8-bit, 16-bit, and 32-bit integers
+#define GB_CTYPE_BITS \
+    GB_ctype_bits
+
+// 1 if monoid update can skipped entirely (the ANY monoid)
+#define GB_IS_ANY_MONOID \
+    GB_is_any_monoid
+
+// 1 if monoid update is EQ
+#define GB_IS_EQ_MONOID \
+    GB_is_eq_monoid
+
+// 1 if monoid update can be done atomically, 0 otherwise
+#define GB_HAS_ATOMIC \
+    GB_has_atomic
+
+// 1 if monoid update can be done with an OpenMP atomic update, 0 otherwise
+#define GB_HAS_OMP_ATOMIC \
+    GB_has_omp_atomic
+
+// 1 for the ANY_PAIR semirings
+#define GB_IS_ANY_PAIR_SEMIRING \
+    GB_is_any_pair_semiring
+
+// 1 if PAIR is the multiply operator 
+#define GB_IS_PAIR_MULTIPLIER \
+    GB_is_pair_multiplier
+
+#if GB_IS_ANY_PAIR_SEMIRING
 
-#define GB_SAUNA_WORK(i) Sauna_Work [i]
+    // result is purely symbolic; no numeric work to do.  Hx is not used.
+    #define GB_HX_WRITE(i,t)
+    #define GB_CIJ_GATHER(p,i)
+    #define GB_HX_UPDATE(i,t)
+    #define GB_CIJ_MEMCPY(p,i,len)
+
+#else
+
+    // Hx [i] = t
+    #define GB_HX_WRITE(i,t) Hx [i] = t
+
+    // Cx [p] = Hx [i]
+    #define GB_CIJ_GATHER(p,i) Cx [p] = Hx [i]
+
+    // Hx [i] += t
+    #define GB_HX_UPDATE(i,t) \
+        GB_add_update(Hx [i], t)
+
+    // memcpy (&(Cx [p]), &(Hx [i]), len)
+    #define GB_CIJ_MEMCPY(p,i,len) \
+        memcpy (Cx +(p), Hx +(i), (len) * sizeof(GB_ctype))
+
+#endif
 
 // disable this semiring and use the generic case if these conditions hold
 #define GB_DISABLE \
     GB_disable
 
-//------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: gather/scatter saxpy-based method (Gustavson)
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AgusB
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-)
-{ 
-    #if GB_DISABLE
-    return (GrB_NO_VALUE) ;
-    #else
-    GB_ctype *GB_RESTRICT Sauna_Work = Sauna->Sauna_Work ;
-    GB_ctype *GB_RESTRICT Cx = C->x ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_Gustavson_meta.c"
-    return (info) ;
-    #endif
-}
-
 //------------------------------------------------------------------------------
 // C=A'*B or C<!M>=A'*B: dot product (phase 2)
 //------------------------------------------------------------------------------
@@ -125,7 +177,7 @@ GrB_Info GB_AgusB
 GrB_Info GB_Adot2B
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -151,7 +203,7 @@ GrB_Info GB_Adot2B
 GrB_Info GB_Adot3B
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -168,33 +220,51 @@ GrB_Info GB_Adot3B
 }
 
 //------------------------------------------------------------------------------
-// C<M>=A*B and C=A*B: heap saxpy-based method
+// C+=A'*B: dense dot product
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Adot4B
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_AxB_dot4_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C=A*B, C<M>=A*B, C<!M>=A*B: saxpy3 method (Gustavson + Hash)
 //------------------------------------------------------------------------------
 
-#include "GB_heap.h"
+#include "GB_AxB_saxpy3_template.h"
 
-GrB_Info GB_AheapB
+GrB_Info GB_Asaxpy3B
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 )
 { 
     #if GB_DISABLE
     return (GrB_NO_VALUE) ;
     #else
-    GrB_Matrix C = (*Chandle) ;
-    GB_ctype *GB_RESTRICT Cx = C->x ;
-    GB_ctype cij ;
-    int64_t cvlen = C->vlen ;
-    GrB_Info info = GrB_SUCCESS ;
-    #include "GB_AxB_heap_meta.c"
-    return (info) ;
+    #include "GB_AxB_saxpy3_template.c"
+    return (GrB_SUCCESS) ;
     #endif
 }
 
diff --git a/Source/Generator/GB_AxB.h b/Source/Generator/GB_AxB.h
index 2bfda68000..211c20042c 100644
--- a/Source/Generator/GB_AxB.h
+++ b/Source/Generator/GB_AxB.h
@@ -1,16 +1,7 @@
-GrB_Info GB_AgusB
-(
-    GrB_Matrix C,
-    const GrB_Matrix M,
-    const GrB_Matrix A, bool A_is_pattern,
-    const GrB_Matrix B, bool B_is_pattern,
-    GB_Sauna Sauna
-) ;
-
 GrB_Info GB_Adot2B
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix *Aslice, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     int64_t *GB_RESTRICT B_slice,
@@ -21,7 +12,7 @@ GrB_Info GB_Adot2B
 GrB_Info GB_Adot3B
 (
     GrB_Matrix C,
-    const GrB_Matrix M,
+    const GrB_Matrix M, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
     const GB_task_struct *GB_RESTRICT TaskList,
@@ -29,15 +20,25 @@ GrB_Info GB_Adot3B
     const int nthreads
 ) ;
 
-GrB_Info GB_AheapB
+GrB_Info GB_Asaxpy3B
 (
-    GrB_Matrix *Chandle,
-    const GrB_Matrix M,
+    GrB_Matrix C,
+    const GrB_Matrix M, bool Mask_comp, const bool Mask_struct,
     const GrB_Matrix A, bool A_is_pattern,
     const GrB_Matrix B, bool B_is_pattern,
-    int64_t *GB_RESTRICT List,
-    GB_pointer_pair *GB_RESTRICT pA_pair,
-    GB_Element *GB_RESTRICT Heap,
-    const int64_t bjnz_max
+    GB_saxpy3task_struct *GB_RESTRICT TaskList,
+    const int ntasks,
+    const int nfine,
+    const int nthreads,
+    GB_Context Context
 ) ;
 
+GrB_Info GB_Adot4B
+(
+    GrB_Matrix C,
+    const GrB_Matrix A, bool A_is_pattern,
+    int64_t *GB_RESTRICT A_slice, int naslice,
+    const GrB_Matrix B, bool B_is_pattern,
+    int64_t *GB_RESTRICT B_slice, int nbslice,
+    const int nthreads
+) ;
diff --git a/Source/Generator/GB_binop.c b/Source/Generator/GB_binop.c
index 1a9e525de7..390fbe4746 100644
--- a/Source/Generator/GB_binop.c
+++ b/Source/Generator/GB_binop.c
@@ -2,7 +2,7 @@
 // GB_binop:  hard-coded functions for each built-in binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -13,14 +13,19 @@
 #ifndef GBCOMPACT
 #include "GB_control.h"
 #include "GB_ek_slice.h"
+#include "GB_dense.h"
 #include "GB_binop__include.h"
 
 // C=binop(A,B) is defined by the following types and operators:
 
-// A+B function (eWiseAdd):    GB_AaddB
-// A.*B function (eWiseMult):  GB_AemultB
-// A*D function (colscale):    GB_AxD
-// D*A function (rowscale):    GB_DxB
+// A+B function (eWiseAdd):         GB_AaddB
+// A.*B function (eWiseMult):       GB_AemultB
+// A*D function (colscale):         GB_AxD
+// D*A function (rowscale):         GB_DxB
+// C+=A function (dense accum):     GB_Cdense_accumA
+// C+=x function (dense accum):     GB_Cdense_accumX
+// C+=A+B function (dense ewise3):  GB_Cdense_ewise3_accum
+// C=A+B function (dense ewise3):   GB_Cdense_ewise3_noaccum
 
 // C type:   GB_ctype
 // A type:   GB_atype
@@ -44,6 +49,10 @@
 #define GB_GETB(bij,Bx,pB)  \
     GB_getb(bij,Bx,pB)
 
+// declare scalar of the same type as C
+#define GB_CTYPE_SCALAR(t)  \
+    GB_ctype t
+
 // cij = Ax [pA]
 #define GB_COPY_A_TO_C(cij,Ax,pA) cij = Ax [pA] ;
 
@@ -56,6 +65,22 @@
 #define GB_BINOP(z, x, y)   \
     GB_BINARYOP(z, x, y) ;
 
+// op is second
+#define GB_OP_IS_SECOND \
+    GB_op_is_second
+
+// op is plus_fp32 or plus_fp64
+#define GB_OP_IS_PLUS_REAL \
+    GB_op_is_plus_real
+
+// op is minus_fp32 or minus_fp64
+#define GB_OP_IS_MINUS_REAL \
+    GB_op_is_minus_real
+
+// GB_cblas_*axpy gateway routine, if it exists for this operator and type:
+#define GB_CBLAS_AXPY \
+    GB_cblas_axpy
+
 // do the numerical phases of GB_add and GB_emult
 #define GB_PHASE_2_OF_2
 
@@ -66,6 +91,99 @@
 #define GB_DISABLE \
     GB_disable
 
+//------------------------------------------------------------------------------
+// C += A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+if_is_binop_subset
+
+// The op must be MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, or RDIV.
+
+void GB_Cdense_ewise3_accum
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #include "GB_dense_ewise3_accum_template.c"
+}
+
+endif_is_binop_subset
+
+//------------------------------------------------------------------------------
+// C = A+B, all 3 matrices dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_ewise3_noaccum
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_ewise3_noaccum_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += A, accumulate a sparse matrix into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumA
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    if_C_dense_update
+    { 
+        #include "GB_dense_subassign_23_template.c"
+    }
+    endif_C_dense_update
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C += x, accumulate a scalar into a dense matrix
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_accumX
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+)
+{
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    if_C_dense_update
+    { 
+        GB_ctype ywork = (*((GB_ctype *) p_ywork)) ;
+        #include "GB_dense_subassign_22_template.c"
+        return (GrB_SUCCESS) ;
+    }
+    endif_C_dense_update
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
 //------------------------------------------------------------------------------
 // C = A*D, column scale with diagonal D matrix
 //------------------------------------------------------------------------------
@@ -120,6 +238,7 @@ GrB_Info GB_AaddB
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -147,6 +266,7 @@ GrB_Info GB_AemultB
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generator/GB_binop.h b/Source/Generator/GB_binop.h
index 6123bb8717..28153b08e3 100644
--- a/Source/Generator/GB_binop.h
+++ b/Source/Generator/GB_binop.h
@@ -1,3 +1,42 @@
+
+if_is_binop_subset
+
+void GB_Cdense_ewise3_accum
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+endif_is_binop_subset
+
+GrB_Info GB_Cdense_ewise3_noaccum
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const GrB_Matrix B,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumA
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_accumX
+(
+    GrB_Matrix C,
+    const GB_void *p_ywork,
+    const int nthreads
+) ;
+
 GrB_Info GB_AxD
 (
     GrB_Matrix C,
@@ -22,6 +61,7 @@ GrB_Info GB_AaddB
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const bool Ch_is_Mh,
@@ -37,6 +77,7 @@ GrB_Info GB_AemultB
 (
     GrB_Matrix C,
     const GrB_Matrix M,
+    const bool Mask_struct,
     const GrB_Matrix A,
     const GrB_Matrix B,
     const int64_t *GB_RESTRICT C_to_M,
diff --git a/Source/Generator/GB_red.c b/Source/Generator/GB_red.c
index 4fca5885cc..7e91afce0d 100644
--- a/Source/Generator/GB_red.c
+++ b/Source/Generator/GB_red.c
@@ -2,7 +2,7 @@
 // GB_red:  hard-coded functions for reductions
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,6 +11,7 @@
 
 #include "GB.h"
 #ifndef GBCOMPACT
+#include "GB_atomics.h"
 #include "GB_ek_slice.h"
 #include "GB_control.h" 
 #include "GB_red__include.h"
@@ -102,6 +103,11 @@
     #define GB_PANEL                                \
         GB_panel
 
+// special case for the ANY monoid
+
+    #define GB_IS_ANY_MONOID                        \
+        GB_is_any_monoid
+
 // disable this operator and use the generic case if these conditions hold
 #define GB_DISABLE \
     GB_disable
@@ -131,10 +137,14 @@ GrB_Info GB_red_scalar
     #endif
 }
 
+endif_is_monoid
+
 //------------------------------------------------------------------------------
 // reduce to each vector: each vector A(:,k) reduces to a scalar Tx (k)
 //------------------------------------------------------------------------------
 
+if_is_monoid
+
 GrB_Info GB_red_eachvec
 (
     GB_atype *GB_RESTRICT Tx,
@@ -156,10 +166,14 @@ GrB_Info GB_red_eachvec
     #endif
 }
 
+endif_is_monoid
+
 //------------------------------------------------------------------------------
 // reduce to each index: each A(i,:) reduces to a scalar T (i)
 //------------------------------------------------------------------------------
 
+if_is_monoid
+
 GrB_Info GB_red_eachindex
 (
     GrB_Matrix *Thandle,
diff --git a/Source/Generator/GB_sel.c b/Source/Generator/GB_sel.c
index e09be0a97c..60ca3612c4 100644
--- a/Source/Generator/GB_sel.c
+++ b/Source/Generator/GB_sel.c
@@ -2,7 +2,7 @@
 // GB_sel:  hard-coded functions for selection operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Generator/GB_type.c b/Source/Generator/GB_type.c
new file mode 100644
index 0000000000..4a554234bd
--- /dev/null
+++ b/Source/Generator/GB_type.c
@@ -0,0 +1,121 @@
+//------------------------------------------------------------------------------
+// GB_type:  hard-coded functions for each built-in type
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// If this file is in the Generated/ folder, do not edit it (auto-generated).
+
+#include "GB.h"
+#ifndef GBCOMPACT
+#include "GB_control.h"
+#include "GB_ek_slice.h"
+#include "GB_type__include.h"
+
+// The operation is defined by the following types and operators:
+
+// C<M>=x (C is dense): GB_Cdense_05d
+// C<A>=A (C is dense): GB_Cdense_06d
+
+// C type:   GB_ctype
+
+#define GB_CTYPE \
+    GB_ctype
+
+#define GB_CX(p) Cx [p]
+
+// Cx [p] = scalar
+#define GB_COPY_SCALAR_TO_C(p,x) Cx [p] = x
+
+// Cx [p] = Ax [pA]
+#define GB_COPY_A_TO_C(Cx,p,Ax,pA) Cx [p] = Ax [pA]
+
+// test the mask condition with Ax [pA]
+#define GB_AX_MASK(Ax,pA,asize) (Ax [pA] != 0)
+
+// hard-coded loops can be vectorized
+#define GB_PRAGMA_VECTORIZE GB_PRAGMA_SIMD
+
+// disable this operator and use the generic case if these conditions hold
+#define GB_DISABLE \
+    GB_disable
+
+//------------------------------------------------------------------------------
+// C<M>=x, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_05d
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    GB_ctype cwork = (*((GB_ctype *) p_cwork)) ;
+    #include "GB_dense_subassign_05d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<A>=A, when C is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_06d
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_06d_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+//------------------------------------------------------------------------------
+// C<M>=A, when C is empty and A is dense
+//------------------------------------------------------------------------------
+
+GrB_Info GB_Cdense_25
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+)
+{ 
+    #if GB_DISABLE
+    return (GrB_NO_VALUE) ;
+    #else
+    #include "GB_dense_subassign_25_template.c"
+    return (GrB_SUCCESS) ;
+    #endif
+}
+
+#endif
+
diff --git a/Source/Generator/GB_type.h b/Source/Generator/GB_type.h
new file mode 100644
index 0000000000..63ebf53fe3
--- /dev/null
+++ b/Source/Generator/GB_type.h
@@ -0,0 +1,38 @@
+
+GrB_Info GB_Cdense_05d
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const bool Mask_struct,
+    const GB_void *p_cwork,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_06d
+(
+    GrB_Matrix C,
+    const GrB_Matrix A,
+    const bool Mask_struct,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
+GrB_Info GB_Cdense_25
+(
+    GrB_Matrix C,
+    const GrB_Matrix M,
+    const GrB_Matrix A,
+    const int64_t *GB_RESTRICT kfirst_slice,
+    const int64_t *GB_RESTRICT klast_slice,
+    const int64_t *GB_RESTRICT pstart_slice,
+    const int ntasks,
+    const int nthreads
+) ;
+
diff --git a/Source/Generator/GB_unaryop.c b/Source/Generator/GB_unaryop.c
index 22c592f268..657c250658 100644
--- a/Source/Generator/GB_unaryop.c
+++ b/Source/Generator/GB_unaryop.c
@@ -2,7 +2,7 @@
 // GB_unaryop:  hard-coded functions for each built-in unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -42,8 +42,8 @@
     GB_UNARYOP(z, x) ;
 
 // casting
-#define GB_CASTING(z, x)   \
-    GB_CAST(z, x) ;
+#define GB_CASTING(z, aij) \
+    GB_CAST(z, aij) ;
 
 // cij = op (cast (aij))
 #define GB_CAST_OP(pC,pA)           \
@@ -51,8 +51,8 @@
     /* aij = Ax [pA] */             \
     GB_GETA (aij, Ax, pA) ;         \
     /* Cx [pC] = op (cast (aij)) */ \
-    GB_CASTING (x, aij) ;           \
-    GB_OP (GB_CX (pC), x) ;         \
+    GB_CASTING (z, aij) ;           \
+    GB_OP (GB_CX (pC), z) ;         \
 }
 
 // disable this operator and use the generic case if these conditions hold
@@ -65,8 +65,8 @@
 
 GrB_Info GB_unop
 (
-    GB_ctype *GB_RESTRICT Cx,
-    const GB_atype *GB_RESTRICT Ax,
+    GB_ctype *Cx,       // Cx and Ax may be aliased
+    GB_atype *Ax,
     int64_t anz,
     int nthreads
 )
diff --git a/Source/Generator/GB_unaryop.h b/Source/Generator/GB_unaryop.h
index 5f71465d3c..e3692dd7e6 100644
--- a/Source/Generator/GB_unaryop.h
+++ b/Source/Generator/GB_unaryop.h
@@ -1,7 +1,7 @@
 GrB_Info GB_unop
 (
-    GB_ctype *GB_RESTRICT Cx,
-    const GB_atype *GB_RESTRICT Ax,
+    GB_ctype *Cx,
+    GB_atype *Ax,
     int64_t anz,
     int nthreads
 ) ;
diff --git a/Source/GrB_BinaryOp_free.c b/Source/GrB_BinaryOp_free.c
index d3c4c250a1..ac1620c1cb 100644
--- a/Source/GrB_BinaryOp_free.c
+++ b/Source/GrB_BinaryOp_free.c
@@ -2,7 +2,7 @@
 // GrB_BinaryOp_free: free a binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,9 +17,9 @@ GrB_Info GrB_BinaryOp_free          // free a user-created binary operator
 
     if (binaryop != NULL)
     {
-        // only free a run-time user-defined operator
+        // only free a user-defined operator
         GrB_BinaryOp op = *binaryop ;
-        if (op != NULL && op->opcode == GB_USER_R_opcode)
+        if (op != NULL && op->opcode == GB_USER_opcode)
         {
             if (op->magic == GB_MAGIC)
             { 
diff --git a/Source/GrB_BinaryOp_new.c b/Source/GrB_BinaryOp_new.c
index bbb5b6f0cb..8994174287 100644
--- a/Source/GrB_BinaryOp_new.c
+++ b/Source/GrB_BinaryOp_new.c
@@ -2,7 +2,7 @@
 // GrB_BinaryOp_new: create a new user-defined binary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Col_assign.c b/Source/GrB_Col_assign.c
index 651776e84e..fe7f15d245 100644
--- a/Source/GrB_Col_assign.c
+++ b/Source/GrB_Col_assign.c
@@ -2,7 +2,7 @@
 // GrB_Col_assign:    C<M>(Rows,col) = accum (C(Rows,col),u)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GrB_Col_assign             // C<M>(Rows,col) = accum (C(Rows,col),u)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Col_assign (C, M, accum, u, Rows, nRows, col, desc)") ;
+    GB_BURBLE_START ("GrB_assign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -36,7 +37,8 @@ GrB_Info GrB_Col_assign             // C<M>(Rows,col) = accum (C(Rows,col),u)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C(Rows,col)<M> = accum (C(Rows,col), u)
@@ -46,9 +48,9 @@ GrB_Info GrB_Col_assign             // C<M>(Rows,col) = accum (C(Rows,col),u)
     GrB_Index Cols [1] ;
     Cols [0] = col ;
 
-    return (GB_assign (
+    info = GB_assign (
         C,                  C_replace,      // C matrix and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         false,                              // do not transpose the mask
         accum,                              // for accum (C(Rows,col),u)
         (GrB_Matrix) u,     false,          // u as a matrix; never transposed
@@ -56,6 +58,8 @@ GrB_Info GrB_Col_assign             // C<M>(Rows,col) = accum (C(Rows,col),u)
         Cols, 1,                            // a single column index
         false, NULL, GB_ignore_code,        // no scalar expansion
         true, false,                        // GrB_Col_assign
-        Context)) ;
-}
+        Context) ;
 
+    GB_BURBLE_END ;
+    return (info) ;
+}
diff --git a/Source/GrB_Col_extract.c b/Source/GrB_Col_extract.c
index 9e4f69904a..ccfde1c8e1 100644
--- a/Source/GrB_Col_extract.c
+++ b/Source/GrB_Col_extract.c
@@ -2,7 +2,7 @@
 // GrB_Col_extract: w<M> = accum (w, A(I,j)) or A(j,I)'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GrB_Col_extract        // w<M> = accum (w, A(I,j)) or (A(j,I))'
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Col_extract (w, M, accum, A, I, ni, j, desc)") ;
+    GB_BURBLE_START ("GrB_extract") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
@@ -39,7 +40,8 @@ GrB_Info GrB_Col_extract        // w<M> = accum (w, A(I,j)) or (A(j,I))'
     ASSERT (GB_IMPLIES (M != NULL, GB_VECTOR_OK (M))) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     GrB_Index ancols = (A_transpose ? GB_NROWS (A) : GB_NCOLS (A)) ;
     if (j >= ancols)
@@ -60,13 +62,16 @@ GrB_Info GrB_Col_extract        // w<M> = accum (w, A(I,j)) or (A(j,I))'
     // do the work in GB_extract
     //--------------------------------------------------------------------------
 
-    return (GB_extract (
+    info = GB_extract (
         (GrB_Matrix) w,    C_replace,   // w as a matrix, and descriptor
-        (GrB_Matrix) M,    Mask_comp,   // mask a matrix, and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct,  // mask and its descriptor
         accum,                          // optional accum for z=accum(w,t)
         A,                 A_transpose, // A and its descriptor
         I, ni,                          // row indices I and length ni
         J, 1,                           // one column index, nj = 1
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Descriptor_free.c b/Source/GrB_Descriptor_free.c
index edd09eea11..bc2a55eb68 100644
--- a/Source/GrB_Descriptor_free.c
+++ b/Source/GrB_Descriptor_free.c
@@ -2,11 +2,14 @@
 // GrB_Descriptor_free: free a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
+// Predefined descriptors are not freed.  Attempts to do so are silently
+// ignored.
+
 #include "GB.h"
 
 GrB_Info GrB_Descriptor_free            // free a descriptor
@@ -18,7 +21,7 @@ GrB_Info GrB_Descriptor_free            // free a descriptor
     if (descriptor != NULL)
     {
         GrB_Descriptor desc = *descriptor ;
-        if (desc != NULL && desc->magic == GB_MAGIC)
+        if (desc != NULL && desc->magic == GB_MAGIC && !(desc->predefined))
         { 
             desc->magic = GB_FREED ;     // to help detect dangling pointers
             GB_FREE_MEMORY (*descriptor, 1,
diff --git a/Source/GrB_Descriptor_new.c b/Source/GrB_Descriptor_new.c
index 4d56eec58e..e4fc2717f1 100644
--- a/Source/GrB_Descriptor_new.c
+++ b/Source/GrB_Descriptor_new.c
@@ -2,7 +2,7 @@
 // GrB_Descriptor_new: create a new descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -44,9 +44,10 @@ GrB_Info GrB_Descriptor_new     // create a new descriptor
     desc->mask = GxB_DEFAULT ;     // descriptor for the mask input
     desc->in0  = GxB_DEFAULT ;     // descriptor for the first input
     desc->in1  = GxB_DEFAULT ;     // descriptor for the second input
-    desc->axb  = GxB_DEFAULT ;     // descriptor for C=A*B
-    desc->nthreads_max = GxB_DEFAULT ;
-    desc->chunk = GxB_DEFAULT ;
+    desc->axb  = GxB_DEFAULT ;     // descriptor for selecting the C=A*B method
+    desc->nthreads_max = GxB_DEFAULT ;  // max # of threads to use
+    desc->chunk = GxB_DEFAULT ;         // chunk for auto-tuning of # threads
+    desc->predefined = false ;     // user-defined
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GrB_Descriptor_set.c b/Source/GrB_Descriptor_set.c
index b24b8366b8..101d587ba8 100644
--- a/Source/GrB_Descriptor_set.c
+++ b/Source/GrB_Descriptor_set.c
@@ -2,7 +2,7 @@
 // GrB_Descriptor_set: set a field in a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -25,6 +25,12 @@ GrB_Info GrB_Descriptor_set     // set a parameter in a descriptor
     GB_RETURN_IF_NULL_OR_FAULTY (desc) ;
     ASSERT_DESCRIPTOR_OK (desc, "desc to set", GB0) ;
 
+    if (desc->predefined)
+    { 
+        return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
+            "predefined descriptors may not be modified"))) ;
+    }
+
     //--------------------------------------------------------------------------
     // set the parameter
     //--------------------------------------------------------------------------
@@ -46,14 +52,25 @@ GrB_Info GrB_Descriptor_set     // set a parameter in a descriptor
 
         case GrB_MASK : 
 
-            if (! (value == GxB_DEFAULT || value == GrB_SCMP))
+            if (! (value == GxB_DEFAULT ||
+                   value == GrB_COMP ||
+                   value == GrB_STRUCTURE ||
+                   value == (GrB_COMP + GrB_STRUCTURE)))
             { 
                 return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
                     "invalid descriptor value [%d] for GrB_MASK field;\n"
-                    "must be GxB_DEFAULT [%d] or GrB_SCMP [%d]",
-                    (int) value, (int) GxB_DEFAULT, (int) GrB_SCMP))) ;
+                    "must be GxB_DEFAULT [%d], GrB_COMP [%d],\n"
+                    "GrB_STRUCTURE [%d], or GrB_COMP+GrB_STRUCTURE [%d]",
+                    (int) value, (int) GxB_DEFAULT, (int) GrB_COMP,
+                    (int) GrB_STRUCTURE,
+                    (int) (GrB_COMP + GrB_STRUCTURE)))) ;
+            }
+            switch (value)
+            {
+                case GrB_COMP:      desc->mask |= GrB_COMP ;      break ;
+                case GrB_STRUCTURE: desc->mask |= GrB_STRUCTURE ; break ;
+                default:            desc->mask = value ;          break ;
             }
-            desc->mask = value ;
             break ;
 
         case GrB_INP0 : 
@@ -83,14 +100,17 @@ GrB_Info GrB_Descriptor_set     // set a parameter in a descriptor
         case GxB_AxB_METHOD : 
 
             if (! (value == GxB_DEFAULT  || value == GxB_AxB_GUSTAVSON
-                || value == GxB_AxB_HEAP || value == GxB_AxB_DOT))
+                || value == GxB_AxB_HEAP || value == GxB_AxB_DOT
+                || value == GxB_AxB_HASH || value == GxB_AxB_SAXPY))
             { 
                 return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
                     "invalid descriptor value [%d] for GrB_AxB_METHOD field;\n"
                     "must be GxB_DEFAULT [%d], GxB_AxB_GUSTAVSON [%d]\n"
-                    "GxB_AxB_HEAP [%d] or GxB_AxB_DOT [%d]",
+                    "GxB_AxB_HEAP [%d], GxB_AxB_DOT [%d]"
+                    "GxB_AxB_HASH [%d] or GxB_AxB_SAXPY [%d]",
                     (int) value, (int) GxB_DEFAULT, (int) GxB_AxB_GUSTAVSON,
-                    (int) GxB_AxB_HEAP, (int) GxB_AxB_DOT))) ;
+                    (int) GxB_AxB_HEAP, (int) GxB_AxB_DOT,
+                    (int) GxB_AxB_HASH, (int) GxB_AxB_SAXPY))) ;
             }
             desc->axb  = value ;
             break ;
diff --git a/Source/GrB_Matrix_apply.c b/Source/GrB_Matrix_apply.c
index 6e5e4b5bfc..244ec81b29 100644
--- a/Source/GrB_Matrix_apply.c
+++ b/Source/GrB_Matrix_apply.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_apply: apply a unary operator to a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,23 +27,28 @@ GrB_Info GrB_Matrix_apply           // C<M> = accum (C, op(A)) or op(A')
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Matrix_apply (C, M, accum, op, A, desc)") ;
+    GB_BURBLE_START ("GrB_apply") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     //--------------------------------------------------------------------------
     // apply the operator and optionally transpose; assemble pending tuples
     //--------------------------------------------------------------------------
 
-    return (GB_apply (
+    info = GB_apply (
         C,      C_replace,          // C and its descriptor
-        M,      Mask_comp,          // mask and its descriptor
+        M, Mask_comp, Mask_struct,  // mask and its descriptor
         accum,                      // optional accum for Z=accum(C,T)
         op,                         // operator to apply to the entries
         A,      A_transpose,        // A and its descriptor
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_assign.c b/Source/GrB_Matrix_assign.c
index 74bcb79095..ec77497897 100644
--- a/Source/GrB_Matrix_assign.c
+++ b/Source/GrB_Matrix_assign.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_assign:    C<M>(Rows,Cols) = accum (C(Rows,Cols),A) or A'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,21 +29,23 @@ GrB_Info GrB_Matrix_assign          // C<M>(Rows,Cols) += A or A'
 
     GB_WHERE ("GrB_Matrix_assign"
         " (C, M, accum, A, Rows, nRows, Cols, nCols, desc)") ;
+    GB_BURBLE_START ("GrB_assign") ;
 
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     //--------------------------------------------------------------------------
     // C<M>(Rows,Cols) = accum (C(Rows,Cols), A) and variations
     //--------------------------------------------------------------------------
 
-    return (GB_assign (
+    info = GB_assign (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         false,                      // do not transpose the mask
         accum,                      // for accum (C(Rows,Cols),A)
         A,          A_transpose,    // A and its descriptor (T=A or A')
@@ -51,6 +53,9 @@ GrB_Info GrB_Matrix_assign          // C<M>(Rows,Cols) += A or A'
         Cols, nCols,                // column indices
         false, NULL, GB_ignore_code,// no scalar expansion
         false, false,               // not GrB_Col_assign nor GrB_row_assign
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_assign_scalar.c b/Source/GrB_Matrix_assign_scalar.c
index fbfead4f00..db983e3511 100644
--- a/Source/GrB_Matrix_assign_scalar.c
+++ b/Source/GrB_Matrix_assign_scalar.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_assign_[SCALAR]: assign a scalar to matrix, via scalar expansion
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -37,10 +37,13 @@ GrB_Info GrB_Matrix_assign_ ## T    /* C<M>(Rows,Cols) += x                 */ \
 {                                                                              \
     GB_WHERE ("GrB_Matrix_assign_" GB_STR(T)                                   \
         " (C, M, accum, x, Rows, nRows, Cols, nCols, desc)") ;                 \
+    GB_BURBLE_START ("GrB_assign") ;                                           \
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;                                          \
     GB_RETURN_IF_FAULTY (M) ;                                                  \
-    return (GB_assign_scalar (C, M, accum, ampersand x, GB_## T ## _code,      \
-        Rows, nRows, Cols, nCols, desc, Context)) ;                            \
+    GrB_Info info = GB_assign_scalar (C, M, accum, ampersand x,                \
+        GB_## T ## _code, Rows, nRows, Cols, nCols, desc, Context) ;           \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_ASSIGN (bool     , BOOL   , &)
diff --git a/Source/GrB_Matrix_build.c b/Source/GrB_Matrix_build.c
index 8f6cd670a1..3c83d0d0e2 100644
--- a/Source/GrB_Matrix_build.c
+++ b/Source/GrB_Matrix_build.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_build: build a sparse GraphBLAS matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,9 +21,12 @@ GrB_Info GrB_Matrix_build_ ## T     /* build a matrix from (I,J,X) tuples */  \
 )                                                                             \
 {                                                                             \
     GB_WHERE ("GrB_Matrix_build_" GB_STR(T) " (C, I, J, X, nvals, dup)") ;    \
+    GB_BURBLE_START ("GrB_Matrix_build") ;                                    \
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;                                         \
-    return (GB_matvec_build (C, I, J, X, nvals, dup, GB_ ## T ## _code, true, \
-        Context)) ;                                                           \
+    GrB_Info info = GB_matvec_build (C, I, J, X, nvals, dup,                  \
+        GB_ ## T ## _code, true, Context) ;                                   \
+    GB_BURBLE_END ;                                                           \
+    return (info) ;                                                           \
 }
 
 GB_MATRIX_BUILD (bool     , BOOL   )
diff --git a/Source/GrB_Matrix_clear.c b/Source/GrB_Matrix_clear.c
index 8f639f1496..87152ffe81 100644
--- a/Source/GrB_Matrix_clear.c
+++ b/Source/GrB_Matrix_clear.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_clear: clears the content of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_dup.c b/Source/GrB_Matrix_dup.c
index 0bcac9db4a..c882c848b3 100644
--- a/Source/GrB_Matrix_dup.c
+++ b/Source/GrB_Matrix_dup.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_dup: make a deep copy of a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -23,6 +23,7 @@ GrB_Info GrB_Matrix_dup     // make an exact copy of a matrix
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Matrix_dup (&C, A)") ;
+    GB_BURBLE_START ("GrB_Matrix_dup") ;
     GB_RETURN_IF_NULL (C) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
@@ -30,6 +31,8 @@ GrB_Info GrB_Matrix_dup     // make an exact copy of a matrix
     // duplicate the matrix
     //--------------------------------------------------------------------------
 
-    return (GB_dup (C, A, true, NULL, Context)) ;
+    GrB_Info info = GB_dup (C, A, true, NULL, Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_extract.c b/Source/GrB_Matrix_extract.c
index c5ea3313f6..588ab0887c 100644
--- a/Source/GrB_Matrix_extract.c
+++ b/Source/GrB_Matrix_extract.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_extract: C<M> = accum (C, A(I,J)) or A(J,I)'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,25 +28,29 @@ GrB_Info GrB_Matrix_extract     // C<M> = accum (C, A(I,J))
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Matrix_extract (C, M, accum, A, I, ni, J, nj, desc)") ;
-
+    GB_BURBLE_START ("GrB_extract") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     //--------------------------------------------------------------------------
     // do the work in GB_extract
     //--------------------------------------------------------------------------
 
-    return (GB_extract (
+    info = GB_extract (
         C,      C_replace,          // output matrix C and its descriptor
-        M,      Mask_comp,          // mask and its descriptor
+        M, Mask_comp, Mask_struct,  // mask and its descriptor
         accum,                      // optional accum for Z=accum(C,T)
         A,      A_transpose,        // A and its descriptor
         I, ni,                      // row indices
         J, nj,                      // column indices
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_extractElement.c b/Source/GrB_Matrix_extractElement.c
index 06b172310a..fdc4ff6e83 100644
--- a/Source/GrB_Matrix_extractElement.c
+++ b/Source/GrB_Matrix_extractElement.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_extractElement: extract a single entry from a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_extractTuples.c b/Source/GrB_Matrix_extractTuples.c
index 3f690e85b3..1d13852945 100644
--- a/Source/GrB_Matrix_extractTuples.c
+++ b/Source/GrB_Matrix_extractTuples.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_extractTuples: extract all tuples from a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,10 +30,13 @@ GrB_Info GrB_Matrix_extractTuples_ ## T     /* [I,J,X] = find (A) */          \
 )                                                                             \
 {                                                                             \
     GB_WHERE ("GrB_Matrix_extractTuples_" GB_STR(T) " (I, J, X, nvals, A)") ; \
+    GB_BURBLE_START ("GrB_Matrix_extractTuples") ;                            \
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;                                         \
     GB_RETURN_IF_NULL (p_nvals) ;                                             \
-    return (GB_extractTuples (I, J, X, p_nvals, GB_ ## T ## _code, A,         \
-        Context)) ;                                                           \
+    GrB_Info info = GB_extractTuples (I, J, X, p_nvals, GB_ ## T ## _code, A, \
+        Context) ;                                                            \
+    GB_BURBLE_END ;                                                           \
+    return (info) ;                                                           \
 }
 
 GB_EXTRACT (bool     , BOOL   )
diff --git a/Source/GrB_Matrix_free.c b/Source/GrB_Matrix_free.c
index b176de1857..7d632f667e 100644
--- a/Source/GrB_Matrix_free.c
+++ b/Source/GrB_Matrix_free.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_free: free a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_ncols.c b/Source/GrB_Matrix_ncols.c
index c2493394c3..85f2a496a1 100644
--- a/Source/GrB_Matrix_ncols.c
+++ b/Source/GrB_Matrix_ncols.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_ncols: number of columns of a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_new.c b/Source/GrB_Matrix_new.c
index 66a275b3fc..75081d1472 100644
--- a/Source/GrB_Matrix_new.c
+++ b/Source/GrB_Matrix_new.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_new: create a new matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_nrows.c b/Source/GrB_Matrix_nrows.c
index 0be9026c58..565a3ccb2b 100644
--- a/Source/GrB_Matrix_nrows.c
+++ b/Source/GrB_Matrix_nrows.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_nrows: number of rows of a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Matrix_nvals.c b/Source/GrB_Matrix_nvals.c
index 4fa516bf56..14b0ae7a8f 100644
--- a/Source/GrB_Matrix_nvals.c
+++ b/Source/GrB_Matrix_nvals.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_nvals: number of entries in a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GrB_Matrix_nvals   // get the number of entries in a matrix
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Matrix_nvals (&nvals, A)") ;
+    GB_BURBLE_START ("GrB_Matrix_nvals") ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // do not check nvals; pending updates must be applied first, in
@@ -30,6 +31,8 @@ GrB_Info GrB_Matrix_nvals   // get the number of entries in a matrix
     // get the number of entries
     //--------------------------------------------------------------------------
 
-    return (GB_nvals (nvals, A, Context)) ;
+    GrB_Info info = GB_nvals (nvals, A, Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_reduce_scalar.c b/Source/GrB_Matrix_reduce_scalar.c
index 1bcaf8eef7..9913ab8d85 100644
--- a/Source/GrB_Matrix_reduce_scalar.c
+++ b/Source/GrB_Matrix_reduce_scalar.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_reduce_scalar: reduce a matrix to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,8 +29,12 @@ GrB_Info GrB_Matrix_reduce_ ## T    /* c = accum (c, reduce_to_scalar (A))  */ \
 )                                                                              \
 {                                                                              \
     GB_WHERE ("GrB_Matrix_reduce_" GB_STR(T) " (&c, accum, reduce, A, desc)") ;\
+    GB_BURBLE_START ("GrB_reduce") ;                                           \
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;                                          \
-    return (GB_reduce_to_scalar (c, GrB_ ## T, accum, reduce, A, Context)) ;   \
+    GrB_Info info = GB_reduce_to_scalar (c, GrB_ ## T, accum, reduce, A,       \
+        Context) ;                                                             \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_MATRIX_TO_SCALAR (bool     , BOOL   )
@@ -62,9 +66,12 @@ GrB_Info GrB_Matrix_reduce_UDT      // c = accum (c, reduce_to_scalar (A))
     // Thus, the type of c must be the same as the reduce monoid.
 
     GB_WHERE ("GrB_Matrix_reduce_UDT (&c, accum, reduce, A, desc)") ;
+    GB_BURBLE_START ("GrB_reduce") ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
     GB_RETURN_IF_NULL_OR_FAULTY (reduce) ;
-    return (GB_reduce_to_scalar (c, reduce->op->ztype, accum, reduce, A,
-        Context)) ;
+    GrB_Info info = GB_reduce_to_scalar (c, reduce->op->ztype, accum, reduce,
+        A, Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Matrix_setElement.c b/Source/GrB_Matrix_setElement.c
index 0f60d60e79..3a7dea46df 100644
--- a/Source/GrB_Matrix_setElement.c
+++ b/Source/GrB_Matrix_setElement.c
@@ -2,7 +2,7 @@
 // GrB_Matrix_setElement: set an entry in a matrix, C(row,col) = x
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Monoid_free.c b/Source/GrB_Monoid_free.c
index efbb35cd9c..14f129e95e 100644
--- a/Source/GrB_Monoid_free.c
+++ b/Source/GrB_Monoid_free.c
@@ -2,7 +2,7 @@
 // GrB_Monoid_free:  free a monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Monoid_new.c b/Source/GrB_Monoid_new.c
index bd45bb06cb..100d97c135 100644
--- a/Source/GrB_Monoid_new.c
+++ b/Source/GrB_Monoid_new.c
@@ -2,7 +2,7 @@
 // GrB_Monoid_new:  create a new monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Row_assign.c b/Source/GrB_Row_assign.c
index 12dfba452e..634e9c89c6 100644
--- a/Source/GrB_Row_assign.c
+++ b/Source/GrB_Row_assign.c
@@ -2,7 +2,7 @@
 // GrB_Row_assign:    C<M'>(row,Cols) = accum (C(row,Cols),u')
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GrB_Row_assign             // C<M'>(row,Cols) += u'
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Row_assign (C, M, accum, u, row, Cols, nCols, desc)") ;
+    GB_BURBLE_START ("GrB_assign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -36,7 +37,8 @@ GrB_Info GrB_Row_assign             // C<M'>(row,Cols) += u'
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C<M'>(row,Cols) = accum (C(row,Cols), u')
@@ -46,9 +48,9 @@ GrB_Info GrB_Row_assign             // C<M'>(row,Cols) += u'
     GrB_Index Rows [1] ;
     Rows [0] = row ;
 
-    return (GB_assign (
+    info = GB_assign (
         C,                  C_replace,      // C matrix and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         true,                               // transpose the mask
         accum,                              // for accum (C(Rows,col),u)
         (GrB_Matrix) u,     true,           // u as a matrix; always transposed
@@ -56,6 +58,9 @@ GrB_Info GrB_Row_assign             // C<M'>(row,Cols) += u'
         Cols, nCols,                        // column indices
         false, NULL, GB_ignore_code,        // no scalar expansion
         false, true,                        // GrB_Row_assign
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Semiring_free.c b/Source/GrB_Semiring_free.c
index 3763033997..a30a16ed5b 100644
--- a/Source/GrB_Semiring_free.c
+++ b/Source/GrB_Semiring_free.c
@@ -2,7 +2,7 @@
 // GrB_Semiring_free: free a semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Semiring_new.c b/Source/GrB_Semiring_new.c
index 446c63febd..2de9f8f161 100644
--- a/Source/GrB_Semiring_new.c
+++ b/Source/GrB_Semiring_new.c
@@ -2,7 +2,7 @@
 // GrB_Semiring_new: create a new semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Type_free.c b/Source/GrB_Type_free.c
index 8fd6407b15..515462204e 100644
--- a/Source/GrB_Type_free.c
+++ b/Source/GrB_Type_free.c
@@ -2,7 +2,7 @@
 // GrB_Type_free:  free a user-defined type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Type_new.c b/Source/GrB_Type_new.c
index 25869ab558..8a3d8e19ca 100644
--- a/Source/GrB_Type_new.c
+++ b/Source/GrB_Type_new.c
@@ -2,7 +2,7 @@
 // GrB_Type_new: create a new user-defined type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_UnaryOp_free.c b/Source/GrB_UnaryOp_free.c
index 6815eb25f3..bfc949e375 100644
--- a/Source/GrB_UnaryOp_free.c
+++ b/Source/GrB_UnaryOp_free.c
@@ -2,7 +2,7 @@
 // GrB_UnaryOp_free: free a unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,9 +17,9 @@ GrB_Info GrB_UnaryOp_free           // free a user-created unary operator
 
     if (unaryop != NULL)
     {
-        // only free a run-time user-defined operator
+        // only free a user-defined operator
         GrB_UnaryOp op = *unaryop ;
-        if (op != NULL && op->opcode == GB_USER_R_opcode)
+        if (op != NULL && op->opcode == GB_USER_opcode)
         {
             if (op->magic == GB_MAGIC)
             { 
diff --git a/Source/GrB_UnaryOp_new.c b/Source/GrB_UnaryOp_new.c
index 811f0cae6f..6e46b301ef 100644
--- a/Source/GrB_UnaryOp_new.c
+++ b/Source/GrB_UnaryOp_new.c
@@ -2,7 +2,7 @@
 // GrB_UnaryOp_new: create a new user-defined unary operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_apply.c b/Source/GrB_Vector_apply.c
index a706a62734..60480afc8d 100644
--- a/Source/GrB_Vector_apply.c
+++ b/Source/GrB_Vector_apply.c
@@ -2,7 +2,7 @@
 // GrB_Vector_apply: apply a unary operator to a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -25,6 +25,7 @@ GrB_Info GrB_Vector_apply           // w<M> = accum (w, op(u))
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Vector_apply (w, M, accum, op, u, desc)") ;
+    GB_BURBLE_START ("GrB_apply") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -34,18 +35,22 @@ GrB_Info GrB_Vector_apply           // w<M> = accum (w, op(u))
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // apply the operator; do not transpose
     //--------------------------------------------------------------------------
 
-    return (GB_apply (
+    info = GB_apply (
         (GrB_Matrix) w,     C_replace,      // w and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         accum,                              // optional accum for Z=accum(C,T)
         op,                                 // operator to apply to the entries
         (GrB_Matrix) u,     false,          // u, not transposed
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_assign.c b/Source/GrB_Vector_assign.c
index dbc3cbdff2..ef3f0a644b 100644
--- a/Source/GrB_Vector_assign.c
+++ b/Source/GrB_Vector_assign.c
@@ -2,7 +2,7 @@
 // GrB_Vector_assign:    w<M>(Rows) = accum (w(Rows),u)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,6 +28,7 @@ GrB_Info GrB_Vector_assign          // w<M>(Rows) = accum (w(Rows),u)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Vector_assign (w, M, accum, u, Rows, nRows, desc)") ;
+    GB_BURBLE_START ("GrB_assign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -36,26 +37,26 @@ GrB_Info GrB_Vector_assign          // w<M>(Rows) = accum (w(Rows),u)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // w(Rows)<M> = accum (w(Rows), u) and variations
     //--------------------------------------------------------------------------
 
-    // construct the column index list Cols = [ 0 ] of length nCols = 1
-    GrB_Index Cols [1] ;
-    Cols [0] = 0 ;
-
-    return (GB_assign (
+    info = GB_assign (
         (GrB_Matrix) w,     C_replace,  // w vector and its descriptor
-        (GrB_Matrix) M,     Mask_comp,  // mask matrix and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct,  // mask and its descriptor
         false,                          // do not transpose the mask
-        accum,                          // for accum (C(Rows,Cols),A)
+        accum,                          // for accum (C(Rows,:),A)
         (GrB_Matrix) u,     false,      // u as a matrix; never transposed
         Rows, nRows,                    // row indices
-        Cols, 1,                        // one column index, nCols = 1
+        GrB_ALL, 1,                     // all column indices
         false, NULL, GB_ignore_code,    // no scalar expansion
         false, false,                   // not GrB_Col_assign nor GrB_Row_assign
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_assign_scalar.c b/Source/GrB_Vector_assign_scalar.c
index cf85652e37..19f53241a4 100644
--- a/Source/GrB_Vector_assign_scalar.c
+++ b/Source/GrB_Vector_assign_scalar.c
@@ -2,7 +2,7 @@
 // GrB_Vector_assign_[SCALAR]: assign scalar to vector, via scalar expansion
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,13 +27,16 @@ GrB_Info GrB_Vector_assign_ ## T    /* w<M>(Rows) = accum (w(Rows),x)       */ \
 {                                                                              \
     GB_WHERE ("GrB_Vector_assign_" GB_STR(T)                                   \
         " (w, M, accum, x, Rows, nRows, desc)") ;                              \
+    GB_BURBLE_START ("GrB_assign") ;                                           \
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;                                          \
     GB_RETURN_IF_FAULTY (M) ;                                                  \
     ASSERT (GB_VECTOR_OK (w)) ;                                                \
     ASSERT (GB_IMPLIES (M != NULL, GB_VECTOR_OK (M))) ;                        \
-    return (GB_assign_scalar ((GrB_Matrix) w, (GrB_Matrix) M, accum,           \
+    GrB_Info info = GB_assign_scalar ((GrB_Matrix) w, (GrB_Matrix) M, accum,   \
         ampersand x, GB_## T ## _code, Rows, nRows, GrB_ALL, 1, desc,          \
-        Context)) ;                                                            \
+        Context) ;                                                             \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_ASSIGN (bool     , BOOL   , &)
diff --git a/Source/GrB_Vector_build.c b/Source/GrB_Vector_build.c
index 9b4e84fbb8..b89279cd51 100644
--- a/Source/GrB_Vector_build.c
+++ b/Source/GrB_Vector_build.c
@@ -2,7 +2,7 @@
 // GrB_Vector_build: build a sparse GraphBLAS vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,11 +20,13 @@ GrB_Info GrB_Vector_build_ ## T     /* build a vector from (I,X) tuples   */  \
 )                                                                             \
 {                                                                             \
     GB_WHERE ("GrB_Vector_build_" GB_STR(T) " (w, I, X, nvals, dup)") ;       \
+    GB_BURBLE_START ("GrB_Vector_build") ;                                    \
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;                                         \
     ASSERT (GB_VECTOR_OK (w)) ;                                               \
     GrB_Info info = GB_matvec_build ((GrB_Matrix) w, I, NULL, X, nvals, dup,  \
         GB_ ## T ## _code, false, Context) ;                                  \
     ASSERT (GB_IMPLIES (info == GrB_SUCCESS, GB_VECTOR_OK (w))) ;             \
+    GB_BURBLE_END ;                                                           \
     return (info) ;                                                           \
 }
 
diff --git a/Source/GrB_Vector_clear.c b/Source/GrB_Vector_clear.c
index b8d85fbd5c..98628a6577 100644
--- a/Source/GrB_Vector_clear.c
+++ b/Source/GrB_Vector_clear.c
@@ -2,7 +2,7 @@
 // GrB_Vector_clear: clears the content of a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_dup.c b/Source/GrB_Vector_dup.c
index 54ee74c5da..d5baf334c9 100644
--- a/Source/GrB_Vector_dup.c
+++ b/Source/GrB_Vector_dup.c
@@ -2,7 +2,7 @@
 // GrB_Vector_dup: make a deep copy of a sparse vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -23,6 +23,7 @@ GrB_Info GrB_Vector_dup     // make an exact copy of a vector
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Vector_dup (&w, u)") ;
+    GB_BURBLE_START ("GrB_Vector_dup") ;
     GB_RETURN_IF_NULL (w) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
     ASSERT (GB_VECTOR_OK (u)) ;
@@ -31,6 +32,9 @@ GrB_Info GrB_Vector_dup     // make an exact copy of a vector
     // duplicate the vector
     //--------------------------------------------------------------------------
 
-    return (GB_dup ((GrB_Matrix *) w, (GrB_Matrix) u, true, NULL, Context)) ;
+    GrB_Info info = GB_dup ((GrB_Matrix *) w, (GrB_Matrix) u, true, NULL,
+        Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_extract.c b/Source/GrB_Vector_extract.c
index 4efa1126e8..3b163d28de 100644
--- a/Source/GrB_Vector_extract.c
+++ b/Source/GrB_Vector_extract.c
@@ -2,7 +2,7 @@
 // GrB_Vector_extract: w<M> = accum (w, u(I))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -26,6 +26,7 @@ GrB_Info GrB_Vector_extract         // w<M> = accum (w, u(I))
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Vector_extract (w, M, accum, u, I, ni, desc)") ;
+    GB_BURBLE_START ("GrB_extract") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -34,7 +35,8 @@ GrB_Info GrB_Vector_extract         // w<M> = accum (w, u(I))
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // extract entries
@@ -47,21 +49,20 @@ GrB_Info GrB_Vector_extract         // w<M> = accum (w, u(I))
     // not transposed.  All GrB_Matrix objects will be in CSC format, and no
     // matrices are transposed via the C_is_vector option in GB_extract.
 
-    // construct the column index list J = [ 0 ] of length nj = 1
-    GrB_Index J [1] ;
-    J [0] = 0 ;
-
     //--------------------------------------------------------------------------
     // do the work in GB_extract
     //--------------------------------------------------------------------------
 
-    return (GB_extract (
+    info = GB_extract (
         (GrB_Matrix) w,     C_replace,  // w as a matrix, and its descriptor
-        (GrB_Matrix) M,     Mask_comp,  // mask matrix, and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct,  // mask and its descriptor
         accum,                          // optional accum for z=accum(w,t)
         (GrB_Matrix) u,     false,      // u as matrix; never transposed
         I, ni,                          // row indices I and length ni
-        J, 1,                           // one column index, nj = 1
-        Context)) ;
+        GrB_ALL, 1,                     // all columns
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_extractElement.c b/Source/GrB_Vector_extractElement.c
index 71e36e76d8..cba6083404 100644
--- a/Source/GrB_Vector_extractElement.c
+++ b/Source/GrB_Vector_extractElement.c
@@ -2,7 +2,7 @@
 // GrB_Vector_extractElement: extract a single entry from a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_extractTuples.c b/Source/GrB_Vector_extractTuples.c
index 17bddad2d0..dc0bff9ee6 100644
--- a/Source/GrB_Vector_extractTuples.c
+++ b/Source/GrB_Vector_extractTuples.c
@@ -2,7 +2,7 @@
 // GrB_Vector_extractTuples: extract all tuples from a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,11 +29,14 @@ GrB_Info GrB_Vector_extractTuples_ ## T     /* [I,~,X] = find (A) */          \
 )                                                                             \
 {                                                                             \
     GB_WHERE ("GrB_Vector_extractTuples_" GB_STR(T) " (I, X, nvals, v)") ;    \
+    GB_BURBLE_START ("GrB_Vector_extractTuples") ;                            \
     GB_RETURN_IF_NULL_OR_FAULTY (v) ;                                         \
     GB_RETURN_IF_NULL (p_nvals) ;                                             \
     ASSERT (GB_VECTOR_OK (v)) ;                                               \
-    return (GB_extractTuples (I, NULL, X, p_nvals, GB_ ## T ## _code,         \
-        (GrB_Matrix) v, Context)) ;                                           \
+    GrB_Info info = GB_extractTuples (I, NULL, X, p_nvals, GB_ ## T ## _code, \
+        (GrB_Matrix) v, Context) ;                                            \
+    GB_BURBLE_END ;                                                           \
+    return (info) ;                                                           \
 }
 
 GB_EXTRACT (bool     , BOOL   )
diff --git a/Source/GrB_Vector_free.c b/Source/GrB_Vector_free.c
index b6c6f5d8d5..ab6da1e7fd 100644
--- a/Source/GrB_Vector_free.c
+++ b/Source/GrB_Vector_free.c
@@ -2,7 +2,7 @@
 // GrB_Vector_free: free a sparse vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_new.c b/Source/GrB_Vector_new.c
index aa7c3b9085..b5c17ed5d3 100644
--- a/Source/GrB_Vector_new.c
+++ b/Source/GrB_Vector_new.c
@@ -2,7 +2,7 @@
 // GrB_Vector_new: create a new vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_nvals.c b/Source/GrB_Vector_nvals.c
index c49e1de843..429ab3507a 100644
--- a/Source/GrB_Vector_nvals.c
+++ b/Source/GrB_Vector_nvals.c
@@ -2,7 +2,7 @@
 // GrB_Vector_nvals: number of entries in a sparse vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,7 @@ GrB_Info GrB_Vector_nvals   // get the number of entries in a vector
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_Vector_nvals (&nvals, v)") ;
+    GB_BURBLE_START ("GrB_Vector_nvals") ;
     GB_RETURN_IF_NULL_OR_FAULTY (v) ;
     ASSERT (GB_VECTOR_OK (v)) ;
 
@@ -31,6 +32,8 @@ GrB_Info GrB_Vector_nvals   // get the number of entries in a vector
     // get the number of entries
     //--------------------------------------------------------------------------
 
-    return (GB_nvals (nvals, (GrB_Matrix) v, Context)) ;
+    GrB_Info info = GB_nvals (nvals, (GrB_Matrix) v, Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_reduce_scalar.c b/Source/GrB_Vector_reduce_scalar.c
index 25ac61b49e..53732f077f 100644
--- a/Source/GrB_Vector_reduce_scalar.c
+++ b/Source/GrB_Vector_reduce_scalar.c
@@ -2,7 +2,7 @@
 // GrB_Vector_reduce_scalar: reduce a vector to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,10 +29,13 @@ GrB_Info GrB_Vector_reduce_ ## T    /* c = accum (c, reduce_to_scalar (u))  */ \
 )                                                                              \
 {                                                                              \
     GB_WHERE ("GrB_Vector_reduce_" GB_STR(T) " (&c, accum, reduce, u, desc)") ;\
+    GB_BURBLE_START ("GrB_reduce") ;                                           \
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;                                          \
     ASSERT (GB_VECTOR_OK (u)) ;                                                \
-    return (GB_reduce_to_scalar (c, GrB_ ## T, accum, reduce, (GrB_Matrix) u,  \
-        Context)) ;                                                     \
+    GrB_Info info = GB_reduce_to_scalar (c, GrB_ ## T, accum, reduce,          \
+        (GrB_Matrix) u, Context) ;                                             \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_VECTOR_TO_SCALAR (bool     , BOOL   )
@@ -58,10 +61,13 @@ GrB_Info GrB_Vector_reduce_UDT      // c = accum (c, reduce_to_scalar (u))
 { 
     // See comments on GrB_Matrix_reduce_UDT
     GB_WHERE ("GrB_Vector_reduce_UDT (&c, accum, reduce, u, desc)") ;
+    GB_BURBLE_START ("GrB_reduce") ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
     GB_RETURN_IF_NULL_OR_FAULTY (reduce) ;
     ASSERT (GB_VECTOR_OK (u)) ;
-    return (GB_reduce_to_scalar (c, reduce->op->ztype,
-        accum, reduce, (GrB_Matrix) u, Context)) ;
+    GrB_Info info = GB_reduce_to_scalar (c, reduce->op->ztype,
+        accum, reduce, (GrB_Matrix) u, Context) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_Vector_setElement.c b/Source/GrB_Vector_setElement.c
index 21a03a20a8..c6f10abc33 100644
--- a/Source/GrB_Vector_setElement.c
+++ b/Source/GrB_Vector_setElement.c
@@ -2,7 +2,7 @@
 // GrB_Vector_setElement: set an entry in a vector, w (row) = x
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_Vector_size.c b/Source/GrB_Vector_size.c
index 209326e622..7047466fd3 100644
--- a/Source/GrB_Vector_size.c
+++ b/Source/GrB_Vector_size.c
@@ -2,7 +2,7 @@
 // GrB_Vector_size: dimension of a sparse vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_eWiseAdd_Matrix.c b/Source/GrB_eWiseAdd_Matrix.c
index fddf57506b..d585b6a810 100644
--- a/Source/GrB_eWiseAdd_Matrix.c
+++ b/Source/GrB_eWiseAdd_Matrix.c
@@ -2,7 +2,7 @@
 // GrB_eWiseAdd_Matrix: matrix element-wise operations, set union
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,25 +12,24 @@
 #include "GB_ewise.h"
 
 #define GB_EWISE(op)                                                        \
-{                                                                           \
     /* check inputs */                                                      \
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (B) ;                                       \
     GB_RETURN_IF_FAULTY (M) ;                                               \
     /* get the descriptor */                                                \
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_tran, B_tran, xx) ; \
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,       \
+        A_tran, B_tran, xx) ;                                               \
     /* C<M> = accum (C,T) where T = A+B, A'+B, A+B', or A'+B' */            \
-    return (GB_ewise (                                                      \
+    info = GB_ewise (                                                       \
         C,              C_replace,  /* C and its descriptor        */       \
-        M,              Mask_comp,  /* mask and its descriptor     */       \
+        M, Mask_comp, Mask_struct,  /* mask and its descriptor     */       \
         accum,                      /* accumulate operator         */       \
         op,                         /* operator that defines '+'   */       \
         A,              A_tran,     /* A matrix and its descriptor */       \
         B,              B_tran,     /* B matrix and its descriptor */       \
         true,                       /* eWiseAdd                    */       \
-        Context)) ;                                                         \
-}
+        Context) ;
 
 //------------------------------------------------------------------------------
 // GrB_eWiseAdd_Matrix_BinaryOp: matrix addition
@@ -53,6 +52,7 @@ GrB_Info GrB_eWiseAdd_Matrix_BinaryOp       // C<M> = accum (C, A+B)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseAdd_Matrix_BinaryOp (C, M, accum, add, A, B, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (add) ;
 
     //--------------------------------------------------------------------------
@@ -60,6 +60,8 @@ GrB_Info GrB_eWiseAdd_Matrix_BinaryOp       // C<M> = accum (C, A+B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (add) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -85,6 +87,7 @@ GrB_Info GrB_eWiseAdd_Matrix_Monoid         // C<M> = accum (C, A+B)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseAdd_Matrix_Monoid (C, M, accum, monoid, A, B, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (monoid) ;
 
     //--------------------------------------------------------------------------
@@ -92,6 +95,8 @@ GrB_Info GrB_eWiseAdd_Matrix_Monoid         // C<M> = accum (C, A+B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (monoid->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -118,6 +123,7 @@ GrB_Info GrB_eWiseAdd_Matrix_Semiring       // C<M> = accum (C, A+B)
 
     GB_WHERE ("GrB_eWiseAdd_Matrix_Semiring (C, M, accum, semiring, A, B,"
         " desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (semiring) ;
 
     //--------------------------------------------------------------------------
@@ -125,5 +131,7 @@ GrB_Info GrB_eWiseAdd_Matrix_Semiring       // C<M> = accum (C, A+B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (semiring->add->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_eWiseAdd_Vector.c b/Source/GrB_eWiseAdd_Vector.c
index a0fef9d8d2..436f7f918b 100644
--- a/Source/GrB_eWiseAdd_Vector.c
+++ b/Source/GrB_eWiseAdd_Vector.c
@@ -2,7 +2,7 @@
 // GrB_eWiseAdd_Vector: vector element-wise operations, set union
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,7 +12,6 @@
 #include "GB_ewise.h"
 
 #define GB_EWISE(op)                                                        \
-{                                                                           \
     /* check inputs */                                                      \
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;                                       \
@@ -23,18 +22,18 @@
     ASSERT (GB_VECTOR_OK (v)) ;                                             \
     ASSERT (M == NULL || GB_VECTOR_OK (M)) ;                                \
     /* get the descriptor */                                                \
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;   \
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,       \
+        xx1, xx2, xx3) ;                                                    \
     /* C<M> = accum (C,T) where T = A+B, A'+B, A+B', or A'+B' */            \
-    return (GB_ewise (                                                      \
+    info = GB_ewise (                                                       \
         (GrB_Matrix) w, C_replace,  /* w and its descriptor        */       \
-        (GrB_Matrix) M, Mask_comp,  /* mask and its descriptor     */       \
+        (GrB_Matrix) M, Mask_comp, Mask_struct, /* mask and its descriptor */\
         accum,                      /* accumulate operator         */       \
         op,                         /* operator that defines '+'   */       \
         (GrB_Matrix) u, false,      /* u, never transposed         */       \
         (GrB_Matrix) v, false,      /* v, never transposed         */       \
         true,                       /* eWiseAdd                    */       \
-        Context)) ;                                                         \
-}
+        Context)
 
 //------------------------------------------------------------------------------
 // GrB_eWiseAdd_Vector_BinaryOp: vector addition
@@ -57,6 +56,7 @@ GrB_Info GrB_eWiseAdd_Vector_BinaryOp       // w<M> = accum (w, u+v)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseAdd_Vector_BinaryOp (w, M, accum, add, u, v, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (add) ;
 
     //--------------------------------------------------------------------------
@@ -64,6 +64,8 @@ GrB_Info GrB_eWiseAdd_Vector_BinaryOp       // w<M> = accum (w, u+v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (add) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -87,6 +89,7 @@ GrB_Info GrB_eWiseAdd_Vector_Monoid         // w<M> = accum (w, u+v)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseAdd_Vector_Monoid (w, M, accum, monoid, u, v, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (monoid) ;
 
     //--------------------------------------------------------------------------
@@ -94,6 +97,8 @@ GrB_Info GrB_eWiseAdd_Vector_Monoid         // w<M> = accum (w, u+v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (monoid->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -118,6 +123,7 @@ GrB_Info GrB_eWiseAdd_Vector_Semiring       // w<M> = accum (w, u+v)
 
     GB_WHERE ("GrB_eWiseAdd_Vector_Semiring (w, M, accum, semiring, u, v,"
         " desc)") ;
+    GB_BURBLE_START ("GrB_eWiseAdd") ;
     GB_RETURN_IF_NULL_OR_FAULTY (semiring) ;
 
     //--------------------------------------------------------------------------
@@ -125,5 +131,7 @@ GrB_Info GrB_eWiseAdd_Vector_Semiring       // w<M> = accum (w, u+v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (semiring->add->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_eWiseMult_Matrix.c b/Source/GrB_eWiseMult_Matrix.c
index fff146976a..dc15bc9137 100644
--- a/Source/GrB_eWiseMult_Matrix.c
+++ b/Source/GrB_eWiseMult_Matrix.c
@@ -2,7 +2,7 @@
 // GrB_eWiseMult_Matrix: matrix element-wise operations, using set intersection
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,25 +12,24 @@
 #include "GB_ewise.h"
 
 #define GB_EWISE(op)                                                        \
-{                                                                           \
     /* check inputs */                                                      \
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (B) ;                                       \
     GB_RETURN_IF_FAULTY (M) ;                                               \
     /* get the descriptor */                                                \
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_tran, B_tran, xx) ; \
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,       \
+        A_tran, B_tran, xx) ;                                               \
     /* C<M> = accum (C,T) where T = A.*B, A'.*B, A.*B', or A'.*B' */        \
-    return (GB_ewise (                                                      \
+    info = GB_ewise (                                                       \
         C,              C_replace,  /* C and its descriptor        */       \
-        M,              Mask_comp,  /* mask and its descriptor     */       \
+        M, Mask_comp, Mask_struct,  /* mask and its descriptor     */       \
         accum,                      /* accumulate operator         */       \
         op,                         /* operator that defines '.*'  */       \
         A,              A_tran,     /* A matrix and its descriptor */       \
         B,              B_tran,     /* B matrix and its descriptor */       \
         false,                      /* eWiseMult                   */       \
-        Context)) ;                                                         \
-}
+        Context) ;
 
 //------------------------------------------------------------------------------
 // GrB_eWiseMult_Matrix_BinaryOp: matrix element-wise multiplication
@@ -54,6 +53,7 @@ GrB_Info GrB_eWiseMult_Matrix_BinaryOp       // C<M> = accum (C, A.*B)
 
     GB_WHERE ("GrB_eWiseMult_Matrix_BinaryOp (C, M, accum, mult, A, B,"
         " desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (mult) ;
 
     //--------------------------------------------------------------------------
@@ -61,6 +61,8 @@ GrB_Info GrB_eWiseMult_Matrix_BinaryOp       // C<M> = accum (C, A.*B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (mult) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -86,6 +88,7 @@ GrB_Info GrB_eWiseMult_Matrix_Monoid         // C<M> = accum (C, A.*B)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseMult_Matrix_Monoid (C, M, accum, monoid, A, B, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (monoid) ;
 
     //--------------------------------------------------------------------------
@@ -93,6 +96,8 @@ GrB_Info GrB_eWiseMult_Matrix_Monoid         // C<M> = accum (C, A.*B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (monoid->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -119,6 +124,7 @@ GrB_Info GrB_eWiseMult_Matrix_Semiring       // C<M> = accum (C, A.*B)
 
     GB_WHERE ("GrB_eWiseMult_Matrix_Semiring (C, M, accum, semiring, A, B,"
         " desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (semiring) ;
 
     //--------------------------------------------------------------------------
@@ -126,5 +132,7 @@ GrB_Info GrB_eWiseMult_Matrix_Semiring       // C<M> = accum (C, A.*B)
     //--------------------------------------------------------------------------
 
     GB_EWISE (semiring->multiply) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_eWiseMult_Vector.c b/Source/GrB_eWiseMult_Vector.c
index a624cb7ea1..73aff2ec40 100644
--- a/Source/GrB_eWiseMult_Vector.c
+++ b/Source/GrB_eWiseMult_Vector.c
@@ -2,7 +2,7 @@
 // GrB_eWiseMult_Vector: vector element-wise multiplication
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,7 +12,6 @@
 #include "GB_ewise.h"
 
 #define GB_EWISE(op)                                                        \
-{                                                                           \
     /* check inputs */                                                      \
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;                                       \
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;                                       \
@@ -23,18 +22,18 @@
     ASSERT (GB_VECTOR_OK (v)) ;                                             \
     ASSERT (M == NULL || GB_VECTOR_OK (M)) ;                                \
     /* get the descriptor */                                                \
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;   \
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,       \
+        xx1, xx2, xx3) ;                                                    \
     /* C<M> = accum (C,T) where T = A.*B, A'.*B, A.*B', or A'.*B' */        \
-    return (GB_ewise (                                                      \
+    info = GB_ewise (                                                       \
         (GrB_Matrix) w, C_replace,  /* w and its descriptor        */       \
-        (GrB_Matrix) M, Mask_comp,  /* mask and its descriptor     */       \
+        (GrB_Matrix) M, Mask_comp, Mask_struct,  /* mask and descriptor */  \
         accum,                      /* accumulate operator         */       \
         op,                         /* operator that defines '.*'  */       \
         (GrB_Matrix) u, false,      /* u, never transposed         */       \
         (GrB_Matrix) v, false,      /* v, never transposed         */       \
         false,                      /* eWiseMult                   */       \
-        Context)) ;                                                         \
-}
+        Context) ;
 
 //------------------------------------------------------------------------------
 // GrB_eWiseMult_Vector_BinaryOp: vector element-wise multiplication
@@ -57,6 +56,7 @@ GrB_Info GrB_eWiseMult_Vector_BinaryOp       // w<M> = accum (w, u.*v)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseMult_Vector_BinaryOp (w, M, accum, mult, u, v, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (mult) ;
 
     //--------------------------------------------------------------------------
@@ -64,6 +64,8 @@ GrB_Info GrB_eWiseMult_Vector_BinaryOp       // w<M> = accum (w, u.*v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (mult) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -87,6 +89,7 @@ GrB_Info GrB_eWiseMult_Vector_Monoid         // w<M> = accum (w, u.*v)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_eWiseMult_Vector_Monoid (w, M, accum, monoid, u, v, desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (monoid) ;
 
     //--------------------------------------------------------------------------
@@ -94,6 +97,8 @@ GrB_Info GrB_eWiseMult_Vector_Monoid         // w<M> = accum (w, u.*v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (monoid->op) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
 //------------------------------------------------------------------------------
@@ -118,6 +123,7 @@ GrB_Info GrB_eWiseMult_Vector_Semiring       // w<M> = accum (w, u.*v)
 
     GB_WHERE ("GrB_eWiseMult_Vector_Semiring (w, M, accum, semiring, u, v,"
         " desc)") ;
+    GB_BURBLE_START ("GrB_eWiseMult") ;
     GB_RETURN_IF_NULL_OR_FAULTY (semiring) ;
 
     //--------------------------------------------------------------------------
@@ -125,5 +131,7 @@ GrB_Info GrB_eWiseMult_Vector_Semiring       // w<M> = accum (w, u.*v)
     //--------------------------------------------------------------------------
 
     GB_EWISE (semiring->multiply) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_error.c b/Source/GrB_error.c
index f8a21b5e53..ff48e02015 100644
--- a/Source/GrB_error.c
+++ b/Source/GrB_error.c
@@ -2,7 +2,7 @@
 // GrB_error: return an error string describing the last error
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_finalize.c b/Source/GrB_finalize.c
index 57235e6e8f..3893759e9a 100644
--- a/Source/GrB_finalize.c
+++ b/Source/GrB_finalize.c
@@ -2,7 +2,7 @@
 // GrB_finalize: finalize GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,7 +12,7 @@
 // function.  Results are undefined if more than one thread calls this
 // function at the same time.
 
-#include "GB_Sauna.h"
+#include "GB.h"
 
 GrB_Info GrB_finalize ( )
 { 
@@ -23,15 +23,6 @@ GrB_Info GrB_finalize ( )
 
     GB_WHERE ("GrB_finalize") ;
 
-    //--------------------------------------------------------------------------
-    // free all workspace
-    //--------------------------------------------------------------------------
-
-    for (int Sauna_id = 0 ; Sauna_id < GxB_NTHREADS_MAX ; Sauna_id++)
-    { 
-        GB_Sauna_free (Sauna_id) ;
-    }
-
     //--------------------------------------------------------------------------
     // destroy the queue
     //--------------------------------------------------------------------------
diff --git a/Source/GrB_init.c b/Source/GrB_init.c
index 2bc7c9dcfe..ddaf56a761 100644
--- a/Source/GrB_init.c
+++ b/Source/GrB_init.c
@@ -2,7 +2,7 @@
 // GrB_init: initialize GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GrB_mxm.c b/Source/GrB_mxm.c
index 6c60c9d27b..516f96f9b0 100644
--- a/Source/GrB_mxm.c
+++ b/Source/GrB_mxm.c
@@ -2,7 +2,7 @@
 // GrB_mxm: matrix-matrix multiply
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,29 +32,33 @@ GrB_Info GrB_mxm                    // C<M> = accum (C, A*B)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_mxm (C, M, accum, semiring, A, B, desc)") ;
+    GB_BURBLE_START ("GrB_mxm") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
     GB_RETURN_IF_NULL_OR_FAULTY (B) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose,
-        B_transpose, AxB_method) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, B_transpose, AxB_method) ;
 
     //--------------------------------------------------------------------------
     // C<M> = accum (C,A*B) and variations, using the mxm kernel
     //--------------------------------------------------------------------------
 
     // C<M> = accum (C,T) where T = A*B, A'*B, A*B', or A'*B'
-    return (GB_mxm (
+    info = GB_mxm (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         accum,                      // for accum (C,T)
         semiring,                   // semiring that defines T=A*B
         A,          A_transpose,    // A matrix and its descriptor
         B,          B_transpose,    // B matrix and its descriptor
         false,                      // use fmult(x,y), flipxy false
         AxB_method,                 // algorithm selector
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_mxv.c b/Source/GrB_mxv.c
index 7e138215ab..74f5826196 100644
--- a/Source/GrB_mxv.c
+++ b/Source/GrB_mxv.c
@@ -2,7 +2,7 @@
 // GrB_mxv: matrix-vector multiply
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GrB_mxv                    // w<M> = accum (w, A*u)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_mxv (w, M, accum, semiring, A, u, desc)") ;
+    GB_BURBLE_START ("GrB_mxv") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
@@ -41,24 +42,26 @@ GrB_Info GrB_mxv                    // w<M> = accum (w, A*u)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx,
-        AxB_method) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx, AxB_method) ;
 
     //--------------------------------------------------------------------------
     // w<M> = accum (w,A*u) and variations, using the mxm kernel
     //--------------------------------------------------------------------------
 
     // w, M, and u are passed as matrices to GB_mxm.
-
-    return (GB_mxm (
+    info = GB_mxm (
         (GrB_Matrix) w,     C_replace,      // w and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct,     // mask and its descriptor
         accum,                              // for accum (w,t)
         semiring,                           // definition of matrix multiply
         A,                  A_transpose,    // allow A to be transposed
         (GrB_Matrix) u,     false,          // u is never transposed
         false,                              // fmult(x,y), flipxy false
         AxB_method,                         // algorithm selector
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_reduce_to_vector.c b/Source/GrB_reduce_to_vector.c
index 7e1dc6390b..b88274083f 100644
--- a/Source/GrB_reduce_to_vector.c
+++ b/Source/GrB_reduce_to_vector.c
@@ -2,7 +2,7 @@
 // GrB_reduce_to_vector: reduce a matrix to a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,9 +22,12 @@ GrB_Info GrB_Matrix_reduce_ ## kind /* w<M> = accum (w,reduce(A))          */ \
 {                                                                             \
     GB_WHERE ("GrB_Matrix_reduce_" GB_STR(kind)                               \
         " (w, M, accum, reduce, A, desc)") ;                                  \
+    GB_BURBLE_START ("GrB_reduce") ;                                          \
     GB_RETURN_IF_NULL_OR_FAULTY (reduce) ;                                    \
-    return (GB_reduce_to_vector ((GrB_Matrix) w, (GrB_Matrix) M, accum,       \
-        reduceop, terminal, A, desc, Context)) ;                              \
+    GrB_Info info = GB_reduce_to_vector ((GrB_Matrix) w, (GrB_Matrix) M,      \
+        accum, reduceop, terminal, A, desc, Context) ;                        \
+    GB_BURBLE_END ;                                                           \
+    return (info) ;                                                           \
 }
 
 // With just a GrB_BinaryOp, built-in operators can terminate early (MIN, MAX,
diff --git a/Source/GrB_transpose.c b/Source/GrB_transpose.c
index 0fb577d2ed..00a19ba84c 100644
--- a/Source/GrB_transpose.c
+++ b/Source/GrB_transpose.c
@@ -2,7 +2,7 @@
 // GrB_transpose: transpose a sparse matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GrB_transpose              // C<M> = accum(C,A') or accum(C,A)
     // C may be aliased with M and/or A
 
     GB_WHERE ("GrB_transpose (C, M, accum, A, desc)") ;
+    GB_BURBLE_START ("GrB_transpose") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_FAULTY (accum) ;
@@ -40,7 +41,8 @@ GrB_Info GrB_transpose              // C<M> = accum(C,A') or accum(C,A)
     ASSERT_MATRIX_OK (A, "A input for GrB_transpose", GB0) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     // check domains and dimensions for C<M> = accum (C,T)
     info = GB_compatible (C->type, C, M, accum, A->type, Context) ;
@@ -88,6 +90,7 @@ GrB_Info GrB_transpose              // C<M> = accum(C,A') or accum(C,A)
 
         // T = A', the default behavior.  This step may seem counter-intuitive,
         // but method computes C<M>=A' by default when A_transpose is false.
+        GBBURBLE ("(transpose) ") ;
 
         // Precasting:
         if (accum == NULL)
@@ -121,6 +124,7 @@ GrB_Info GrB_transpose              // C<M> = accum(C,A') or accum(C,A)
         // typecasted eventually, into the type of C if the types of T and C
         // differ.  That can be postponed at no cost since the following step
         // is free.
+        GBBURBLE ("(cheap) ") ;
         info = GB_shallow_copy (&T, C_is_csc, A, Context) ;
     }
 
@@ -138,7 +142,11 @@ GrB_Info GrB_transpose              // C<M> = accum(C,A') or accum(C,A)
     // C<M> = accum (C,T): accumulate the results into C via the mask M
     //--------------------------------------------------------------------------
 
-    return (GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp,
-        Context)) ;
+    info = GB_accum_mask (C, M, NULL, accum, &T, C_replace, Mask_comp, 
+        Mask_struct, Context) ;
+    ASSERT (T == NULL) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_vxm.c b/Source/GrB_vxm.c
index 6e8ef471eb..41282674e9 100644
--- a/Source/GrB_vxm.c
+++ b/Source/GrB_vxm.c
@@ -2,7 +2,7 @@
 // GrB_vxm: vector-matrix multiply
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GrB_vxm                    // w'<M> = accum (w, u'*A)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_vxm (w, M, accum, semiring, u, A, desc)") ;
+    GB_BURBLE_START ("GrB_vxm") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -41,8 +42,8 @@ GrB_Info GrB_vxm                    // w'<M> = accum (w, u'*A)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx, A_transpose,
-        AxB_method) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx, A_transpose, AxB_method) ;
 
     //--------------------------------------------------------------------------
     // w'<M'> = accum (w',u'*A) and variations, using the mxm kernel
@@ -55,15 +56,18 @@ GrB_Info GrB_vxm                    // w'<M> = accum (w, u'*A)
     // Since A and u are swapped, in all the matrix multiply kernels
     // fmult(y,x) must be used instead of fmult(x,y).
 
-    return (GB_mxm (
+    info = GB_mxm (
         (GrB_Matrix) w,     C_replace,      // w and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         accum,                              // for accum (w,t)
         semiring,                           // definition of matrix multiply
         A,                  !A_transpose,   // allow A to be transposed
         (GrB_Matrix) u,     false,          // u is never transposed
         true,                               // flipxy: fmult(y,x)
         AxB_method,                         // algorithm selector
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GrB_wait.c b/Source/GrB_wait.c
index 4620fea6dd..d4b5972243 100644
--- a/Source/GrB_wait.c
+++ b/Source/GrB_wait.c
@@ -2,7 +2,7 @@
 // GrB_wait: finish all pending computations
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -41,6 +41,7 @@ GrB_Info GrB_wait ( )       // finish all pending computations
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GrB_wait ( )") ;
+    GB_BURBLE_START ("GrB_wait") ;
 
     //--------------------------------------------------------------------------
     // assemble all matrices with lingering zombies and/or pending tuples
@@ -61,6 +62,7 @@ GrB_Info GrB_wait ( )       // finish all pending computations
         GB_WAIT (A) ;
     }
 
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_BinaryOp_fprint.c b/Source/GxB_BinaryOp_fprint.c
index a4ce44a780..af50065c4b 100644
--- a/Source/GxB_BinaryOp_fprint.c
+++ b/Source/GxB_BinaryOp_fprint.c
@@ -2,7 +2,7 @@
 // GxB_BinaryOp_fprint: print and check a GrB_BinaryOp object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_BinaryOp_xtype.c b/Source/GxB_BinaryOp_xtype.c
index 8618c60d49..b3aa9fb107 100644
--- a/Source/GxB_BinaryOp_xtype.c
+++ b/Source/GxB_BinaryOp_xtype.c
@@ -2,7 +2,7 @@
 // GxB_BinaryOp_xtype: return the type of x for z=f(x,y)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_BinaryOp_ytype.c b/Source/GxB_BinaryOp_ytype.c
index b61250844d..e1c91faa8b 100644
--- a/Source/GxB_BinaryOp_ytype.c
+++ b/Source/GxB_BinaryOp_ytype.c
@@ -2,7 +2,7 @@
 // GxB_BinaryOp_ytype: return the type of y for z=f(x,y)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_BinaryOp_ztype.c b/Source/GxB_BinaryOp_ztype.c
index 1126dcc757..d99c42ee2e 100644
--- a/Source/GxB_BinaryOp_ztype.c
+++ b/Source/GxB_BinaryOp_ztype.c
@@ -2,7 +2,7 @@
 // GxB_BinaryOp_ztype: return the type of z for z=f(x,y)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Col_subassign.c b/Source/GxB_Col_subassign.c
index 2cd155d3f3..e79311aca1 100644
--- a/Source/GxB_Col_subassign.c
+++ b/Source/GxB_Col_subassign.c
@@ -2,7 +2,7 @@
 // GxB_Col_subassign: C(Rows,col)<M> = accum (C(Rows,col),u)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,7 +29,7 @@ GrB_Info GxB_Col_subassign          // C(Rows,col)<M> = accum (C(Rows,col),u)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Col_subassign (C, M, accum, u, Rows, nRows, col, desc)") ;
-
+    GB_BURBLE_START ("GxB_subassign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -38,7 +38,8 @@ GrB_Info GxB_Col_subassign          // C(Rows,col)<M> = accum (C(Rows,col),u)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C(Rows,col)<M> = accum (C(Rows,col), u) and variations
@@ -48,15 +49,18 @@ GrB_Info GxB_Col_subassign          // C(Rows,col)<M> = accum (C(Rows,col),u)
     GrB_Index Cols [1] ;
     Cols [0] = col ;
 
-    return (GB_subassign (
+    info = GB_subassign (
         C,                  C_replace,      // C matrix and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         false,                              // do not transpose the mask
         accum,                              // for accum (C(Rows,col),u)
         (GrB_Matrix) u,     false,          // u as a matrix; never transposed
         Rows, nRows,                        // row indices
         Cols, 1,                            // a single column index
         false, NULL, GB_ignore_code,        // no scalar expansion
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GxB_Desc_get.c b/Source/GxB_Desc_get.c
index 03b20f42fa..80f6737d9a 100644
--- a/Source/GxB_Desc_get.c
+++ b/Source/GxB_Desc_get.c
@@ -2,7 +2,7 @@
 // GxB_Desc_get: get a field in a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -84,7 +84,7 @@ GrB_Info GxB_Desc_get           // get a parameter from a descriptor
                 va_end (ap) ;
                 GB_RETURN_IF_NULL (nthreads) ;
                 int nth = (desc == NULL) ? GxB_DEFAULT : desc->nthreads_max ;
-                (*nthreads) = GB_IMIN (nth, GxB_NTHREADS_MAX) ;
+                (*nthreads) = nth ;
             }
             break ;
 
diff --git a/Source/GxB_Desc_set.c b/Source/GxB_Desc_set.c
index 91df645f76..519778138c 100644
--- a/Source/GxB_Desc_set.c
+++ b/Source/GxB_Desc_set.c
@@ -2,7 +2,7 @@
 // GxB_Desc_set: set a field in a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -30,6 +30,12 @@ GrB_Info GxB_Desc_set           // set a parameter in a descriptor
     GB_RETURN_IF_NULL_OR_FAULTY (desc) ;
     ASSERT_DESCRIPTOR_OK (desc, "desc to set", GB0) ;
 
+    if (desc->predefined)
+    { 
+        return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
+            "predefined descriptors may not be modified"))) ;
+    }
+
     //--------------------------------------------------------------------------
     // set the parameter
     //--------------------------------------------------------------------------
@@ -62,14 +68,25 @@ GrB_Info GxB_Desc_set           // set a parameter in a descriptor
                 va_start (ap, field) ;
                 GrB_Desc_Value value = va_arg (ap, GrB_Desc_Value) ;
                 va_end (ap) ;
-                if (! (value == GxB_DEFAULT || value == GrB_SCMP))
+                if (! (value == GxB_DEFAULT ||
+                       value == GrB_COMP ||
+                       value == GrB_STRUCTURE ||
+                       value == (GrB_COMP + GrB_STRUCTURE)))
                 { 
                     return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
                         "invalid descriptor value [%d] for GrB_MASK field;\n"
-                        "must be GxB_DEFAULT [%d] or GrB_SCMP [%d]",
-                        (int) value, (int) GxB_DEFAULT, (int) GrB_SCMP))) ;
+                        "must be GxB_DEFAULT [%d], GrB_COMP [%d],\n"
+                        "GrB_STRUCTURE [%d], or GrB_COMP+GrB_STRUCTURE [%d]",
+                        (int) value, (int) GxB_DEFAULT, (int) GrB_COMP,
+                        (int) GrB_STRUCTURE,
+                        (int) (GrB_COMP + GrB_STRUCTURE)))) ;
+                }
+                switch (value)
+                {
+                    case GrB_COMP:      desc->mask |= GrB_COMP ;      break ;
+                    case GrB_STRUCTURE: desc->mask |= GrB_STRUCTURE ; break ;
+                    default:            desc->mask = value ;          break ;
                 }
-                desc->mask = value ;
             }
             break ;
 
@@ -132,14 +149,17 @@ GrB_Info GxB_Desc_set           // set a parameter in a descriptor
                 GrB_Desc_Value value = va_arg (ap, GrB_Desc_Value) ;
                 va_end (ap) ;
                 if (! (value == GxB_DEFAULT  || value == GxB_AxB_GUSTAVSON
-                    || value == GxB_AxB_HEAP || value == GxB_AxB_DOT))
+                    || value == GxB_AxB_HEAP || value == GxB_AxB_DOT
+                    || value == GxB_AxB_HASH || value == GxB_AxB_SAXPY))
                 { 
                     return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
                         "invalid descriptor value [%d] for GrB_AxB_METHOD"
                         " field;\nmust be GxB_DEFAULT [%d], GxB_AxB_GUSTAVSON"
-                        " [%d]\nGxB_AxB_HEAP [%d] or GxB_AxB_DOT [%d]",
+                        " [%d]\nGxB_AxB_HEAP [%d], GxB_AxB_DOT [%d]\n"
+                        " GxB_AxB_HASH [%d] or GxB_AxB_SAXPY [%d]",
                         (int) value, (int) GxB_DEFAULT, (int) GxB_AxB_GUSTAVSON,
-                        (int) GxB_AxB_HEAP, (int) GxB_AxB_DOT))) ;
+                        (int) GxB_AxB_HEAP, (int) GxB_AxB_DOT,
+                        (int) GxB_AxB_HASH, (int) GxB_AxB_SAXPY))) ;
                 }
                 desc->axb  = value ;
             }
diff --git a/Source/GxB_Descriptor_fprint.c b/Source/GxB_Descriptor_fprint.c
index 7b57ed1665..98871ac4ae 100644
--- a/Source/GxB_Descriptor_fprint.c
+++ b/Source/GxB_Descriptor_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Descriptor_fprint: print and check a GrB_Descriptor object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Descriptor_get.c b/Source/GxB_Descriptor_get.c
index d97e31c8c9..8d2970ac1b 100644
--- a/Source/GxB_Descriptor_get.c
+++ b/Source/GxB_Descriptor_get.c
@@ -2,7 +2,7 @@
 // GxB_Descriptor_get: get a field in a descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Global_Option_get.c b/Source/GxB_Global_Option_get.c
index 7bcd19f23b..16fa0422da 100644
--- a/Source/GxB_Global_Option_get.c
+++ b/Source/GxB_Global_Option_get.c
@@ -2,7 +2,7 @@
 // GxB_Global_Option_get: get a global default option for all future matrices
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -296,6 +296,21 @@ GrB_Info GxB_Global_Option_get      // gets the current global option
             }
             break ;
 
+        //----------------------------------------------------------------------
+        // controlling diagnostic output, for development only
+        //----------------------------------------------------------------------
+
+        case GxB_BURBLE : 
+
+            {
+                va_start (ap, field) ;
+                bool *burble = va_arg (ap, bool *) ;
+                va_end (ap) ;
+                GB_RETURN_IF_NULL (burble) ;
+                (*burble) = GB_Global_burble_get ( ) ;
+            }
+            break ;
+
         //----------------------------------------------------------------------
         // invalid option
         //----------------------------------------------------------------------
diff --git a/Source/GxB_Global_Option_set.c b/Source/GxB_Global_Option_set.c
index 74391b857f..fc6dc81c08 100644
--- a/Source/GxB_Global_Option_set.c
+++ b/Source/GxB_Global_Option_set.c
@@ -2,7 +2,7 @@
 // GxB_Global_Option_set: set a global default option for all future matrices
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -66,15 +66,6 @@ GrB_Info GxB_Global_Option_set      // set a global default option
                 va_end (ap) ;
                 // if < 1, then treat it as if nthreads_max = 1
                 nthreads_max_new = GB_IMAX (1, nthreads_max_new) ;
-                if (nthreads_max_new > GxB_NTHREADS_MAX)
-                { 
-                    return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
-                        "nthreads_max [%d] must be < GxB_NTHREADS_MAX [%d]\n"
-                        "Recompile with a higher value of GxB_NTHREADS_MAX,\n"
-                        "using -DGxB_NTHREADS_MAX=%d (or higher, as needed)",
-                        nthreads_max_new, GxB_NTHREADS_MAX,
-                        nthreads_max_new))) ;
-                }
                 GB_Global_nthreads_max_set (nthreads_max_new) ;
             }
             break ;
@@ -89,6 +80,16 @@ GrB_Info GxB_Global_Option_set      // set a global default option
             }
             break ;
 
+        case GxB_BURBLE :
+
+            {
+                va_start (ap, field) ;
+                int burble = va_arg (ap, int) ;
+                va_end (ap) ;
+                GB_Global_burble_set ((bool) burble) ;
+            }
+            break ;
+
         default : 
 
             return (GB_ERROR (GrB_INVALID_VALUE, (GB_LOG,
diff --git a/Source/GxB_Matrix_Option_get.c b/Source/GxB_Matrix_Option_get.c
index a89dcf6d3f..bb612974d9 100644
--- a/Source/GxB_Matrix_Option_get.c
+++ b/Source/GxB_Matrix_Option_get.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_Option_get: get an option in a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Matrix_Option_set.c b/Source/GxB_Matrix_Option_set.c
index ea4a09cf88..00d6fbe1ee 100644
--- a/Source/GxB_Matrix_Option_set.c
+++ b/Source/GxB_Matrix_Option_set.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_Option_set: set an option in a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -22,6 +22,7 @@ GrB_Info GxB_Matrix_Option_set      // set an option in a matrix
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Matrix_Option_set (A, field, value)") ;
+    GB_BURBLE_START ("GxB_set") ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
     ASSERT_MATRIX_OK (A, "A to set option", GB0) ;
 
@@ -70,6 +71,7 @@ GrB_Info GxB_Matrix_Option_set      // set an option in a matrix
                 { 
                     // A = A', done in place, and change to the new format.
                     // transpose: no typecast, no op, in place of A
+                    GBBURBLE ("(transpose) ") ;
                     info = GB_transpose (NULL, NULL, new_csc, A, NULL, Context);
                     ASSERT (GB_IMPLIES (info == GrB_SUCCESS,
                         A->is_csc == new_csc)) ;
@@ -86,6 +88,7 @@ GrB_Info GxB_Matrix_Option_set      // set an option in a matrix
 
     }
 
+    GB_BURBLE_END ;
     return (info) ;
 }
 
diff --git a/Source/GxB_Matrix_export_CSC.c b/Source/GxB_Matrix_export_CSC.c
index c873127a0e..b8f1d9a03f 100644
--- a/Source/GxB_Matrix_export_CSC.c
+++ b/Source/GxB_Matrix_export_CSC.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_export_CSC: export a matrix in CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GxB_Matrix_export_CSC  // export and free a CSC matrix
 
     GB_WHERE ("GxB_Matrix_export_CSC (&A, &type, &nrows, &ncols, &nvals,"
         " &nonempty, &Ap, &Ai, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_export_CSC") ;
     GB_EXPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ap) ;
@@ -48,6 +49,7 @@ GrB_Info GxB_Matrix_export_CSC  // export and free a CSC matrix
     if (!((*A)->is_csc))
     { 
         // A = A', done in place, to put A in CSC format
+        GBBURBLE ("(transpose) ") ;
         GB_OK (GB_transpose (NULL, NULL, true, (*A), NULL, Context)) ;
     }
     if ((*A)->is_hyper)
@@ -92,6 +94,7 @@ GrB_Info GxB_Matrix_export_CSC  // export and free a CSC matrix
     // which has already been removed above.
     GB_MATRIX_FREE (A) ;
     ASSERT (*A == NULL) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_export_CSR.c b/Source/GxB_Matrix_export_CSR.c
index 6cd5a8404f..c84833d334 100644
--- a/Source/GxB_Matrix_export_CSR.c
+++ b/Source/GxB_Matrix_export_CSR.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_export_CSR: export a matrix in CSR format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,6 +33,7 @@ GrB_Info GxB_Matrix_export_CSR  // export and free a CSR matrix
 
     GB_WHERE ("GxB_Matrix_export_CSR (&A, &type, &nrows, &ncols, &nvals,"
         " &nonempty, &Ap, &Aj, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_export_CSR") ;
     GB_EXPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ap) ;
@@ -48,6 +49,7 @@ GrB_Info GxB_Matrix_export_CSR  // export and free a CSR matrix
     if ((*A)->is_csc)
     { 
         // A = A', done in place, to put A in CSR format
+        GBBURBLE ("(transpose) ") ;
         GB_OK (GB_transpose (NULL, NULL, false, (*A), NULL, Context)) ;
     }
     if ((*A)->is_hyper)
@@ -92,6 +94,7 @@ GrB_Info GxB_Matrix_export_CSR  // export and free a CSR matrix
     // which has already been removed above.
     GB_MATRIX_FREE (A) ;
     ASSERT (*A == NULL) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_export_HyperCSC.c b/Source/GxB_Matrix_export_HyperCSC.c
index 50f0cb99a5..c23d04c27a 100644
--- a/Source/GxB_Matrix_export_HyperCSC.c
+++ b/Source/GxB_Matrix_export_HyperCSC.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_export_HyperCSC: export a matrix in hypersparse CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,6 +35,7 @@ GrB_Info GxB_Matrix_export_HyperCSC  // export and free a hypersparse CSC matrix
 
     GB_WHERE ("GxB_Matrix_export_HyperCSC (&A, &type, &nrows, &ncols, &nvals,"
         " &nonempty, &nvec, &Ah, &Ap, &Ai, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_export_HyperCSC") ;
     GB_EXPORT_CHECK ;
 
     GB_RETURN_IF_NULL (nvec) ;
@@ -57,6 +58,7 @@ GrB_Info GxB_Matrix_export_HyperCSC  // export and free a hypersparse CSC matrix
     if (!((*A)->is_csc))
     {
         // A = A', done in place, to put A in CSC format
+        GBBURBLE ("(transpose) ") ;
         GB_OK (GB_transpose (NULL, NULL, true, (*A), NULL, Context)) ;
         // the transpose might make it non-hypersparse (if vdim is 1)
         if (!((*A)->is_hyper))
@@ -104,6 +106,7 @@ GrB_Info GxB_Matrix_export_HyperCSC  // export and free a hypersparse CSC matrix
     // which has already been removed above.
     GB_MATRIX_FREE (A) ;
     ASSERT (*A == NULL) ;
-    return (GrB_SUCCESS) ;
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GxB_Matrix_export_HyperCSR.c b/Source/GxB_Matrix_export_HyperCSR.c
index a1f42b89e7..80be13e9fc 100644
--- a/Source/GxB_Matrix_export_HyperCSR.c
+++ b/Source/GxB_Matrix_export_HyperCSR.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_export_HyperCSR: export a matrix in hypersparse CSR format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,6 +35,7 @@ GrB_Info GxB_Matrix_export_HyperCSR  // export and free a hypersparse CSR matrix
 
     GB_WHERE ("GxB_Matrix_export_HyperCSR (&A, &type, &nrows, &ncols, &nvals,"
         " &nonempty, &nvec, &Ah, &Ap, &Aj, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_export_HyperCSR") ;
     GB_EXPORT_CHECK ;
 
     GB_RETURN_IF_NULL (nvec) ;
@@ -57,6 +58,7 @@ GrB_Info GxB_Matrix_export_HyperCSR  // export and free a hypersparse CSR matrix
     if ((*A)->is_csc)
     {
         // A = A', done in place, to put A in CSR format
+        GBBURBLE ("(transpose) ") ;
         GB_OK (GB_transpose (NULL, NULL, false, (*A), NULL, Context)) ;
         // the transpose might make it non-hypersparse (if vdim is 1)
         if (!((*A)->is_hyper))
@@ -104,6 +106,7 @@ GrB_Info GxB_Matrix_export_HyperCSR  // export and free a hypersparse CSR matrix
     // which has already been removed above.
     GB_MATRIX_FREE (A) ;
     ASSERT (*A == NULL) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_fprint.c b/Source/GxB_Matrix_fprint.c
index a061ac26c0..7d16986188 100644
--- a/Source/GxB_Matrix_fprint.c
+++ b/Source/GxB_Matrix_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_fprint: print and check a GrB_Matrix object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Matrix_import_CSC.c b/Source/GxB_Matrix_import_CSC.c
index 00966295d4..cd329ecd02 100644
--- a/Source/GxB_Matrix_import_CSC.c
+++ b/Source/GxB_Matrix_import_CSC.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_import_CSC: import a matrix in CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GxB_Matrix_import_CSC      // import a CSC matrix
 
     GB_WHERE ("GxB_Matrix_import_CSC (&A, type, nrows, ncols, nvals,"
         " nonempty, &Ap, &Ai, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_import_CSC") ;
     GB_IMPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ap) ;
@@ -91,6 +92,7 @@ GrB_Info GxB_Matrix_import_CSC      // import a CSC matrix
     ASSERT (*Ai == NULL) ;
     ASSERT (*Ax == NULL) ;
     ASSERT_MATRIX_OK (*A, "A CSC imported", GB0) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_import_CSR.c b/Source/GxB_Matrix_import_CSR.c
index 9a3543b41f..21ae3f82be 100644
--- a/Source/GxB_Matrix_import_CSR.c
+++ b/Source/GxB_Matrix_import_CSR.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_import_CSR: import a matrix in CSR format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -32,6 +32,7 @@ GrB_Info GxB_Matrix_import_CSR      // import a CSR matrix
 
     GB_WHERE ("GxB_Matrix_import_CSR (&A, type, nrows, ncols, nvals,"
         " nonempty, &Ap, &Aj, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_import_CSR") ;
     GB_IMPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ap) ;
@@ -91,6 +92,7 @@ GrB_Info GxB_Matrix_import_CSR      // import a CSR matrix
     ASSERT (*Aj == NULL) ;
     ASSERT (*Ax == NULL) ;
     ASSERT_MATRIX_OK ((*A), "A CSR imported", GB0) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_import_HyperCSC.c b/Source/GxB_Matrix_import_HyperCSC.c
index a23be6349f..d8a6c7139c 100644
--- a/Source/GxB_Matrix_import_HyperCSC.c
+++ b/Source/GxB_Matrix_import_HyperCSC.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_import_HyperCSC: import a matrix in hypersparse CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -34,6 +34,7 @@ GrB_Info GxB_Matrix_import_HyperCSC     // import a hypersparse CSC matrix
 
     GB_WHERE ("GxB_Matrix_import_HyperCSC (&A, type, nrows, ncols, nvals,"
         " nonempty, nvec, &Ah, &Ap, &Ai, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_import_HyperCSC") ;
     GB_IMPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ah) ;
@@ -101,6 +102,7 @@ GrB_Info GxB_Matrix_import_HyperCSC     // import a hypersparse CSC matrix
     ASSERT (*Ai == NULL) ;
     ASSERT (*Ax == NULL) ;
     ASSERT_MATRIX_OK (*A, "A hyper CSC imported", GB0) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_import_HyperCSR.c b/Source/GxB_Matrix_import_HyperCSR.c
index f2a5d1d99d..8e5024ab3b 100644
--- a/Source/GxB_Matrix_import_HyperCSR.c
+++ b/Source/GxB_Matrix_import_HyperCSR.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_import_HyperCSR: import a matrix in hypersparse CSR format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -34,6 +34,7 @@ GrB_Info GxB_Matrix_import_HyperCSR     // import a hypersparse CSR matrix
 
     GB_WHERE ("GxB_Matrix_import_HyperCSR (&A, type, nrows, ncols, nvals,"
         " nonempty, nvec, &Ah, &Ap, &Aj, &Ax, desc)") ;
+    GB_BURBLE_START ("GxB_Matrix_import_HyperCSR") ;
     GB_IMPORT_CHECK ;
 
     GB_RETURN_IF_NULL (Ah) ;
@@ -101,6 +102,7 @@ GrB_Info GxB_Matrix_import_HyperCSR     // import a hypersparse CSR matrix
     ASSERT (*Aj == NULL) ;
     ASSERT (*Ax == NULL) ;
     ASSERT_MATRIX_OK (*A, "A hyper CSR imported", GB0) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Matrix_resize.c b/Source/GxB_Matrix_resize.c
index ccaad4e6bc..16891628ed 100644
--- a/Source/GxB_Matrix_resize.c
+++ b/Source/GxB_Matrix_resize.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_resize: change the size of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Matrix_select.c b/Source/GxB_Matrix_select.c
index 9fb3960707..b2e8283ab4 100644
--- a/Source/GxB_Matrix_select.c
+++ b/Source/GxB_Matrix_select.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_select: select entries from a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,25 +28,29 @@ GrB_Info GxB_Matrix_select  // C<M> = accum (C, select(A,k)) or select(A',k)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)") ;
+    GB_BURBLE_START ("GxB_select") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     //--------------------------------------------------------------------------
     // select the entries and optionally transpose; assemble pending tuples
     //--------------------------------------------------------------------------
 
-    return (GB_select (
+    info = GB_select (
         C,      C_replace,          // C and its descriptor
-        M,      Mask_comp,          // mask and its descriptor
+        M, Mask_comp, Mask_struct,  // mask and its descriptor
         accum,                      // optional accum for Z=accum(C,T)
         op,                         // operator to select the entries
         A,                          // first input: A
         Thunk,                      // optional input for select operator
         A_transpose,                // descriptor for A
-        Context)) ;
-}
+        Context) ;
 
+    GB_BURBLE_END ;
+    return (info) ;
+}
diff --git a/Source/GxB_Matrix_subassign.c b/Source/GxB_Matrix_subassign.c
index d3891aee9c..b623489a83 100644
--- a/Source/GxB_Matrix_subassign.c
+++ b/Source/GxB_Matrix_subassign.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_subassign: C(Rows,Cols)<M> = accum (C(Rows,Cols),A) or A'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,27 +31,31 @@ GrB_Info GxB_Matrix_subassign       // C(Rows,Cols)<M> += A or A'
 
     GB_WHERE ("GxB_Matrix_subassign"
         " (C, M, accum, A, Rows, nRows, Cols, nCols, desc)") ;
-
+    GB_BURBLE_START ("GxB_subassign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (A) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_transpose, xx1, xx2);
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_transpose, xx1, xx2) ;
 
     //--------------------------------------------------------------------------
     // C(Rows,Cols)<M> = accum (C(Rows,Cols), A) and variations
     //--------------------------------------------------------------------------
 
-    return (GB_subassign (
+    info = GB_subassign (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         false,                      // do not transpose the mask
         accum,                      // for accum (C(Rows,Cols),A)
         A,          A_transpose,    // A and its descriptor (T=A or A')
         Rows, nRows,                // row indices
         Cols, nCols,                // column indices
         false, NULL, GB_ignore_code,// no scalar expansion
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GxB_Matrix_subassign_scalar.c b/Source/GxB_Matrix_subassign_scalar.c
index c803f7f258..aa761f96bb 100644
--- a/Source/GxB_Matrix_subassign_scalar.c
+++ b/Source/GxB_Matrix_subassign_scalar.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_subassign_[SCALAR]: assign to submatrix, via scalar expansion
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,10 +35,13 @@ GrB_Info GxB_Matrix_subassign_ ## T /* C(Rows,Cols)<M> += x                 */ \
 {                                                                              \
     GB_WHERE ("GxB_Matrix_subassign_" GB_STR(T)                                \
         " (C, M, accum, x, Rows, nRows, Cols, nCols, desc)") ;                 \
+    GB_BURBLE_START ("GxB_subassign") ;                                        \
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;                                          \
     GB_RETURN_IF_FAULTY (M) ;                                                  \
-    return (GB_subassign_scalar (C, M, accum, ampersand x,                     \
-        GB_## T ## _code, Rows, nRows, Cols, nCols, desc, Context)) ;          \
+    GrB_Info info = GB_subassign_scalar (C, M, accum, ampersand x,             \
+        GB_## T ## _code, Rows, nRows, Cols, nCols, desc, Context) ;           \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_ASSIGN (bool     , BOOL   , &)
diff --git a/Source/GxB_Matrix_type.c b/Source/GxB_Matrix_type.c
index 23e38a4017..22596074ac 100644
--- a/Source/GxB_Matrix_type.c
+++ b/Source/GxB_Matrix_type.c
@@ -2,7 +2,7 @@
 // GxB_Matrix_type: return the type of a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,6 +27,6 @@ GrB_Info GxB_Matrix_type    // get the type of a matrix
     // get the type
     //--------------------------------------------------------------------------
 
-    return (GB_type (type, A, Context)) ;
+    return (GB_matvec_type (type, A, Context)) ;
 }
 
diff --git a/Source/GxB_Monoid_fprint.c b/Source/GxB_Monoid_fprint.c
index b495ca0803..8df6f44d85 100644
--- a/Source/GxB_Monoid_fprint.c
+++ b/Source/GxB_Monoid_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Monoid_fprint: print and check a GrB_Monoid object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Monoid_identity.c b/Source/GxB_Monoid_identity.c
index c3d35e7dc0..edd2b397b2 100644
--- a/Source/GxB_Monoid_identity.c
+++ b/Source/GxB_Monoid_identity.c
@@ -2,7 +2,7 @@
 // GxB_Monoid_identity: return the identity of a monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Monoid_operator.c b/Source/GxB_Monoid_operator.c
index bb2086c7e7..3ae86009c0 100644
--- a/Source/GxB_Monoid_operator.c
+++ b/Source/GxB_Monoid_operator.c
@@ -2,7 +2,7 @@
 // GxB_Monoid_operator: return the op of a monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Monoid_terminal.c b/Source/GxB_Monoid_terminal.c
index e959e65819..22096bd554 100644
--- a/Source/GxB_Monoid_terminal.c
+++ b/Source/GxB_Monoid_terminal.c
@@ -2,7 +2,7 @@
 // GxB_Monoid_terminal: return the terminal of a monoid (if any)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Monoid_terminal_new.c b/Source/GxB_Monoid_terminal_new.c
index a38ef23978..88f05e9cc6 100644
--- a/Source/GxB_Monoid_terminal_new.c
+++ b/Source/GxB_Monoid_terminal_new.c
@@ -2,7 +2,7 @@
 // GxB_Monoid_terminal_new:  create a new monoid with a terminal value
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Row_subassign.c b/Source/GxB_Row_subassign.c
index 0c64560d4e..7dd5982714 100644
--- a/Source/GxB_Row_subassign.c
+++ b/Source/GxB_Row_subassign.c
@@ -2,7 +2,7 @@
 // GxB_Row_subassign: C(row,Cols)<M'> = accum (C(row,Cols),u')
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GxB_Row_subassign          // C(row,Cols)<M'> += u'
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Row_subassign (C, M, accum, u, row, Cols, nCols, desc)") ;
+    GB_BURBLE_START ("GxB_subassign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (C) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -36,7 +37,8 @@ GrB_Info GxB_Row_subassign          // C(row,Cols)<M'> += u'
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // C(row,Cols)<M'> = accum (C(row,Cols), u')
@@ -46,14 +48,17 @@ GrB_Info GxB_Row_subassign          // C(row,Cols)<M'> += u'
     GrB_Index Rows [1] ;
     Rows [0] = row ;
 
-    return (GB_subassign (
+    info = GB_subassign (
         C,                  C_replace,      // C matrix and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         true,                               // transpose the mask
         accum,                              // for accum (C(Rows,col),u)
         (GrB_Matrix) u,     true,           // u as a matrix; always transposed
         Rows, 1,                            // a single row index
         Cols, nCols,                        // column indices
         false, NULL, GB_ignore_code,        // no scalar expansion
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
diff --git a/Source/GxB_Scalar_clear.c b/Source/GxB_Scalar_clear.c
index d195fcbc96..1dfaa8fe70 100644
--- a/Source/GxB_Scalar_clear.c
+++ b/Source/GxB_Scalar_clear.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_clear: clears the content of a GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_dup.c b/Source/GxB_Scalar_dup.c
index ed1532b491..7ea106306f 100644
--- a/Source/GxB_Scalar_dup.c
+++ b/Source/GxB_Scalar_dup.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_dup: make a deep copy of a sparse GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_extractElement.c b/Source/GxB_Scalar_extractElement.c
index fc86926c82..074c90132f 100644
--- a/Source/GxB_Scalar_extractElement.c
+++ b/Source/GxB_Scalar_extractElement.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_extractElement: extract a single entry from a GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_fprint.c b/Source/GxB_Scalar_fprint.c
index cee55ba453..1240876bed 100644
--- a/Source/GxB_Scalar_fprint.c
+++ b/Source/GxB_Scalar_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_fprint: print and check a GxB_Scalar object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_free.c b/Source/GxB_Scalar_free.c
index 9f04ba043b..4748af2372 100644
--- a/Source/GxB_Scalar_free.c
+++ b/Source/GxB_Scalar_free.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_free: free a sparse GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_new.c b/Source/GxB_Scalar_new.c
index 38a2626b98..840019556b 100644
--- a/Source/GxB_Scalar_new.c
+++ b/Source/GxB_Scalar_new.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_new: create a new GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,7 +17,7 @@ GrB_Info GxB_Scalar_new     // create a new GxB_Scalar with no entries
     GxB_Scalar *s,          // handle of GxB_Scalar to create
     GrB_Type type           // type of GxB_Scalar to create
 )
-{
+{ 
 
     //--------------------------------------------------------------------------
     // check inputs
diff --git a/Source/GxB_Scalar_nvals.c b/Source/GxB_Scalar_nvals.c
index 019ac3a151..1c53ee4fff 100644
--- a/Source/GxB_Scalar_nvals.c
+++ b/Source/GxB_Scalar_nvals.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_nvals: number of entries in a sparse GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_setElement.c b/Source/GxB_Scalar_setElement.c
index 39a6202359..52432be364 100644
--- a/Source/GxB_Scalar_setElement.c
+++ b/Source/GxB_Scalar_setElement.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_setElement: set an entry in a GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Scalar_type.c b/Source/GxB_Scalar_type.c
index ef35138701..69614c0a01 100644
--- a/Source/GxB_Scalar_type.c
+++ b/Source/GxB_Scalar_type.c
@@ -2,7 +2,7 @@
 // GxB_Scalar_type: return the type of a GxB_Scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,6 +27,6 @@ GrB_Info GxB_Scalar_type    // get the type of a GxB_Scalar
     // get the type
     //--------------------------------------------------------------------------
 
-    return (GB_type (type, (GrB_Matrix) s, Context)) ;
+    return (GB_matvec_type (type, (GrB_Matrix) s, Context)) ;
 }
 
diff --git a/Source/GxB_SelectOp_fprint.c b/Source/GxB_SelectOp_fprint.c
index bf99d92c6a..7691ad6ad8 100644
--- a/Source/GxB_SelectOp_fprint.c
+++ b/Source/GxB_SelectOp_fprint.c
@@ -2,7 +2,7 @@
 // GxB_SelectOp_fprint: print and check a GrB_SelectOp object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_SelectOp_free.c b/Source/GxB_SelectOp_free.c
index 3d4d7598a8..896fdafa75 100644
--- a/Source/GxB_SelectOp_free.c
+++ b/Source/GxB_SelectOp_free.c
@@ -2,7 +2,7 @@
 // GxB_SelectOp_free: free a select operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -17,9 +17,9 @@ GrB_Info GxB_SelectOp_free          // free a user-created select operator
 
     if (selectop != NULL)
     {
-        // only free a run-time user-defined operator
+        // only free a user-defined operator
         GxB_SelectOp op = *selectop ;
-        if (op != NULL && op->opcode == GB_USER_SELECT_R_opcode)
+        if (op != NULL && op->opcode == GB_USER_SELECT_opcode)
         {
             if (op->magic == GB_MAGIC)
             { 
diff --git a/Source/GxB_SelectOp_new.c b/Source/GxB_SelectOp_new.c
index 2f14500bc3..c212ce39e4 100644
--- a/Source/GxB_SelectOp_new.c
+++ b/Source/GxB_SelectOp_new.c
@@ -2,7 +2,7 @@
 // GxB_SelectOp_new: create a new user-defined select operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_SelectOp_ttype.c b/Source/GxB_SelectOp_ttype.c
index 7e9d4412ee..a666bb1e92 100644
--- a/Source/GxB_SelectOp_ttype.c
+++ b/Source/GxB_SelectOp_ttype.c
@@ -2,7 +2,7 @@
 // GxB_SelectOp_ttype: return the type of thunk for z=f(x,thunk)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.ttt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_SelectOp_xtype.c b/Source/GxB_SelectOp_xtype.c
index 63dd48a1c0..781bfe88e4 100644
--- a/Source/GxB_SelectOp_xtype.c
+++ b/Source/GxB_SelectOp_xtype.c
@@ -2,7 +2,7 @@
 // GxB_SelectOp_xtype: return the type of x for z=f(x,thunk)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Semiring_add.c b/Source/GxB_Semiring_add.c
index d5d6f04f0b..7387e24e88 100644
--- a/Source/GxB_Semiring_add.c
+++ b/Source/GxB_Semiring_add.c
@@ -2,7 +2,7 @@
 // GxB_Semiring_add: return the additive monoid of a semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Semiring_fprint.c b/Source/GxB_Semiring_fprint.c
index d36e8e1b2c..6a4834eefa 100644
--- a/Source/GxB_Semiring_fprint.c
+++ b/Source/GxB_Semiring_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Semiring_fprint: print and check a GrB_Semiring object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Semiring_multiply.c b/Source/GxB_Semiring_multiply.c
index a7d2b6e12a..45dafce73e 100644
--- a/Source/GxB_Semiring_multiply.c
+++ b/Source/GxB_Semiring_multiply.c
@@ -2,7 +2,7 @@
 // GxB_Semiring_multiply: return the multiply operator of a semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Type_fprint.c b/Source/GxB_Type_fprint.c
index 9277a1c049..b55fd449a1 100644
--- a/Source/GxB_Type_fprint.c
+++ b/Source/GxB_Type_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Type_fprint: print and check a GrB_Type object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Type_size.c b/Source/GxB_Type_size.c
index ddee722133..9673fe7474 100644
--- a/Source/GxB_Type_size.c
+++ b/Source/GxB_Type_size.c
@@ -2,7 +2,7 @@
 // GxB_Type_size: return the size of a type
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_UnaryOp_fprint.c b/Source/GxB_UnaryOp_fprint.c
index b3f7b18c60..44f970a5b6 100644
--- a/Source/GxB_UnaryOp_fprint.c
+++ b/Source/GxB_UnaryOp_fprint.c
@@ -2,7 +2,7 @@
 // GxB_UnaryOp_fprint: print and check a GrB_UnaryOp object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_UnaryOp_xtype.c b/Source/GxB_UnaryOp_xtype.c
index 53850da064..c3351c516c 100644
--- a/Source/GxB_UnaryOp_xtype.c
+++ b/Source/GxB_UnaryOp_xtype.c
@@ -2,7 +2,7 @@
 // GxB_UnaryOp_xtype: return the type of x for z=f(x)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_UnaryOp_ztype.c b/Source/GxB_UnaryOp_ztype.c
index bf46c44a33..683632c076 100644
--- a/Source/GxB_UnaryOp_ztype.c
+++ b/Source/GxB_UnaryOp_ztype.c
@@ -2,7 +2,7 @@
 // GxB_UnaryOp_ztype: return the type of z for z=f(x)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Vector_export.c b/Source/GxB_Vector_export.c
index 0317611a56..f7721df4c3 100644
--- a/Source/GxB_Vector_export.c
+++ b/Source/GxB_Vector_export.c
@@ -2,7 +2,7 @@
 // GxB_Vector_export: export a vector in CSR/CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GxB_Vector_export  // export and free a vector
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Vector_export (&v, &type, &n, &nvals, &vi, &vx, desc)") ;
+    GB_BURBLE_START ("GxB_Vector_export") ;
     GB_RETURN_IF_NULL (v) ;
     GB_RETURN_IF_NULL_OR_FAULTY (*v) ;
     ASSERT_VECTOR_OK (*v, "v to export", GB0) ;
@@ -74,6 +75,7 @@ GrB_Info GxB_Vector_export  // export and free a vector
     // which has already been removed above.
     GB_VECTOR_FREE (v) ;
     ASSERT (*v == NULL) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Vector_fprint.c b/Source/GxB_Vector_fprint.c
index 0269429928..97ba51318c 100644
--- a/Source/GxB_Vector_fprint.c
+++ b/Source/GxB_Vector_fprint.c
@@ -2,7 +2,7 @@
 // GxB_Vector_fprint: print and check a GrB_Vector object
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Vector_import.c b/Source/GxB_Vector_import.c
index c2537f5c1a..d767b9c0c9 100644
--- a/Source/GxB_Vector_import.c
+++ b/Source/GxB_Vector_import.c
@@ -2,7 +2,7 @@
 // GxB_Vector_import: import a vector in CSR/CSC format
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,6 +29,7 @@ GrB_Info GxB_Vector_import  // import a vector in CSC format
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Vector_import (&v, type, n, nvals, &vi, &vx, desc)") ;
+    GB_BURBLE_START ("GxB_Vector_import") ;
     GB_RETURN_IF_NULL (v) ;
     (*v) = NULL ;
     GB_RETURN_IF_NULL_OR_FAULTY (type) ;
@@ -95,6 +96,7 @@ GrB_Info GxB_Vector_import  // import a vector in CSC format
     ASSERT (*vi == NULL) ;
     ASSERT (*vx == NULL) ;
     ASSERT_VECTOR_OK (*v, "v imported", GB0) ;
+    GB_BURBLE_END ;
     return (GrB_SUCCESS) ;
 }
 
diff --git a/Source/GxB_Vector_resize.c b/Source/GxB_Vector_resize.c
index 4da54dc3e9..c6518a3d6d 100644
--- a/Source/GxB_Vector_resize.c
+++ b/Source/GxB_Vector_resize.c
@@ -2,7 +2,7 @@
 // GxB_Vector_resize: change the size of a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_Vector_select.c b/Source/GxB_Vector_select.c
index eaae603b64..1314a32b35 100644
--- a/Source/GxB_Vector_select.c
+++ b/Source/GxB_Vector_select.c
@@ -2,7 +2,7 @@
 // GxB_Vector_select: select entries from a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -26,25 +26,30 @@ GrB_Info GxB_Vector_select          // w<M> = accum (w, select(u,k))
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Vector_select (w, M, accum, op, u, Thunk, desc)") ;
+    GB_BURBLE_START ("GxB_select") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // select the entries; do not transpose; assemble pending entries
     //--------------------------------------------------------------------------
 
-    return (GB_select (
+    info = GB_select (
         (GrB_Matrix) w,     C_replace,      // w and its descriptor
-        (GrB_Matrix) M,     Mask_comp,      // mask and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         accum,                              // optional accum for Z=accum(C,T)
         op,                                 // operator to select the entries
         (GrB_Matrix) u,                     // first input: u
         Thunk,                              // optional input for select op
         false,                              // u, not transposed
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GxB_Vector_subassign.c b/Source/GxB_Vector_subassign.c
index aab2058f88..a24fd0b15b 100644
--- a/Source/GxB_Vector_subassign.c
+++ b/Source/GxB_Vector_subassign.c
@@ -2,7 +2,7 @@
 // GxB_Vector_subassign: w(Rows)<M> = accum (w(Rows),u)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,7 +28,7 @@ GrB_Info GxB_Vector_subassign       // w(Rows)<M> = accum (w(Rows),u)
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_Vector_subassign (w, M, accum, u, Rows, nRows, desc)") ;
-
+    GB_BURBLE_START ("GxB_subassign") ;
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;
     GB_RETURN_IF_FAULTY (M) ;
     GB_RETURN_IF_NULL_OR_FAULTY (u) ;
@@ -38,25 +38,25 @@ GrB_Info GxB_Vector_subassign       // w(Rows)<M> = accum (w(Rows),u)
     ASSERT (GB_VECTOR_OK (u)) ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, xx1, xx2, xx3) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        xx1, xx2, xx3) ;
 
     //--------------------------------------------------------------------------
     // w(Rows)<M> = accum (w(Rows), u) and variations
     //--------------------------------------------------------------------------
 
-    // construct the column index list Cols = [ 0 ] of length nCols = 1
-    GrB_Index Cols [1] ;
-    Cols [0] = 0 ;
-
-    return (GB_subassign (
+    info = GB_subassign (
         (GrB_Matrix) w,     C_replace,  // w vector and its descriptor
-        (GrB_Matrix) M,     Mask_comp,  // mask matrix and its descriptor
+        (GrB_Matrix) M, Mask_comp, Mask_struct, // mask and its descriptor
         false,                          // do not transpose the mask
-        accum,                          // for accum (C(Rows,Cols),A)
+        accum,                          // for accum (C(Rows,:),A)
         (GrB_Matrix) u,     false,      // u as a matrix; never transposed
         Rows, nRows,                    // row indices
-        Cols, 1,                        // one column index, nCols = 1
+        GrB_ALL, 1,                     // all column indices
         false, NULL, GB_ignore_code,    // no scalar expansion
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/GxB_Vector_subassign_scalar.c b/Source/GxB_Vector_subassign_scalar.c
index 52da55693b..3b80cd6f1e 100644
--- a/Source/GxB_Vector_subassign_scalar.c
+++ b/Source/GxB_Vector_subassign_scalar.c
@@ -2,7 +2,7 @@
 // GxB_Vector_subassign_[SCALAR]: assign scalar to vector, via scalar expansion
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,13 +29,16 @@ GrB_Info GxB_Vector_subassign_ ## T /* w(Rows)<M> = accum (w(Rows),x)       */ \
 {                                                                              \
     GB_WHERE ("GxB_Vector_subassign_" GB_STR(T)                                \
         " (w, M, accum, x, Rows, nRows, desc)") ;                              \
+    GB_BURBLE_START ("GxB_subassign") ;                                        \
     GB_RETURN_IF_NULL_OR_FAULTY (w) ;                                          \
     GB_RETURN_IF_FAULTY (M) ;                                                  \
     ASSERT (GB_VECTOR_OK (w)) ;                                                \
     ASSERT (GB_IMPLIES (M != NULL, GB_VECTOR_OK (M))) ;                        \
-    return (GB_subassign_scalar ((GrB_Matrix) w, (GrB_Matrix) M, accum,        \
-        ampersand x, GB_## T ## _code, Rows, nRows, GrB_ALL, 1, desc,          \
+    GrB_Info info = (GB_subassign_scalar ((GrB_Matrix) w, (GrB_Matrix) M,      \
+        accum, ampersand x, GB_## T ## _code, Rows, nRows, GrB_ALL, 1, desc,   \
         Context)) ;                                                            \
+    GB_BURBLE_END ;                                                            \
+    return (info) ;                                                            \
 }
 
 GB_ASSIGN (bool     , BOOL   , &)
diff --git a/Source/GxB_Vector_type.c b/Source/GxB_Vector_type.c
index 47dbf43b02..59c0a6dc4a 100644
--- a/Source/GxB_Vector_type.c
+++ b/Source/GxB_Vector_type.c
@@ -2,7 +2,7 @@
 // GxB_Vector_type: return the type of a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -27,6 +27,6 @@ GrB_Info GxB_Vector_type    // get the type of a vector
     // get the type
     //--------------------------------------------------------------------------
 
-    return (GB_type (type, (GrB_Matrix) v, Context)) ;
+    return (GB_matvec_type (type, (GrB_Matrix) v, Context)) ;
 }
 
diff --git a/Source/GxB_init.c b/Source/GxB_init.c
index 18bf9a18b2..73e6bc28f6 100644
--- a/Source/GxB_init.c
+++ b/Source/GxB_init.c
@@ -2,7 +2,7 @@
 // GxB_init: initialize GraphBLAS and declare malloc/calloc/realloc/free to use
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/GxB_kron.c b/Source/GxB_kron.c
index 6ea7c8bc73..aae8f20b5d 100644
--- a/Source/GxB_kron.c
+++ b/Source/GxB_kron.c
@@ -2,7 +2,7 @@
 // GxB_kron: Kronecker product
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -26,22 +26,27 @@ GrB_Info GxB_kron                   // C<M> = accum (C, kron(A,B))
     //--------------------------------------------------------------------------
 
     GB_WHERE ("GxB_kron (C, M, accum, op, A, B, desc)") ;
+    GB_BURBLE_START ("GxB_kron") ;
 
     // get the descriptor
-    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, A_tran, B_tran, xx) ;
+    GB_GET_DESCRIPTOR (info, desc, C_replace, Mask_comp, Mask_struct,
+        A_tran, B_tran, xx) ;
 
     //--------------------------------------------------------------------------
     // C = kron(A,B)
     //--------------------------------------------------------------------------
 
     // C<M> = accum (C,T) where T = kron(A,B), or with A' and/or B'
-    return (GB_kron (
+    info = GB_kron (
         C,          C_replace,      // C matrix and its descriptor
-        M,          Mask_comp,      // mask matrix and its descriptor
+        M, Mask_comp, Mask_struct,  // mask matrix and its descriptor
         accum,                      // for accum (C,T)
         op,                         // operator that defines T=kron(A,B)
         A,          A_tran,         // A matrix and its descriptor
         B,          B_tran,         // B matrix and its descriptor
-        Context)) ;
+        Context) ;
+
+    GB_BURBLE_END ;
+    return (info) ;
 }
 
diff --git a/Source/README.txt b/Source/README.txt
index 7e597553cc..3019bf4515 100644
--- a/Source/README.txt
+++ b/Source/README.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 This folder, GraphBLAS/Source, contains all the primary source files for
diff --git a/Source/Template/GB_1type_factory.c b/Source/Template/GB_1type_factory.c
new file mode 100644
index 0000000000..0f2af956fc
--- /dev/null
+++ b/Source/Template/GB_1type_factory.c
@@ -0,0 +1,35 @@
+//------------------------------------------------------------------------------
+// GB_1type_factory.c: 1-type switch factory
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// This is a generic switch factory for creating 11 workers that operate on
+// one built-in data types, to be #include'd in another file.
+
+// GB_1TYPE_WORKER(xname) is a macro defined in the #including file.  The last
+// statement of GB_1TYPE_WORKER should be a break or return since it does not
+// appear here.
+
+// User-defined types are not handled.
+
+{
+    switch (ccode)
+    {
+        case GB_BOOL_code   : GB_1TYPE_WORKER (_bool  )
+        case GB_INT8_code   : GB_1TYPE_WORKER (_int8  )
+        case GB_INT16_code  : GB_1TYPE_WORKER (_int16 )
+        case GB_INT32_code  : GB_1TYPE_WORKER (_int32 )
+        case GB_INT64_code  : GB_1TYPE_WORKER (_int64 )
+        case GB_UINT8_code  : GB_1TYPE_WORKER (_uint8 )
+        case GB_UINT16_code : GB_1TYPE_WORKER (_uint16)
+        case GB_UINT32_code : GB_1TYPE_WORKER (_uint32)
+        case GB_UINT64_code : GB_1TYPE_WORKER (_uint64)
+        case GB_FP32_code   : GB_1TYPE_WORKER (_fp32  )
+        case GB_FP64_code   : GB_1TYPE_WORKER (_fp64  )
+        default: ;
+    }
+}
diff --git a/Source/Template/GB_2type_factory.c b/Source/Template/GB_2type_factory.c
index fbf77c76e6..97f7102bdb 100644
--- a/Source/Template/GB_2type_factory.c
+++ b/Source/Template/GB_2type_factory.c
@@ -2,7 +2,7 @@
 // GB_2type_factory.c: 2-type switch factory
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_AxB_Gustavson_mask.c b/Source/Template/GB_AxB_Gustavson_mask.c
deleted file mode 100644
index ff244289d2..0000000000
--- a/Source/Template/GB_AxB_Gustavson_mask.c
+++ /dev/null
@@ -1,315 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson_mask:  compute C<M>=A*B using the Gustavson method, with M
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// This file is #include'd in GB_AxB_Gustavson.c, and Template/GB_AxB.c, the
-// latter of which expands into Generated/GB_AxB__* for all built-in semirings.
-
-// The pattern of C has not been computed, but nnz(M) has given an upper bound
-// on nnz(C) so this method will not run out of memory.  This is Gustavson's
-// method, extended to handle hypersparse matrices, arbitrary semirings, and a
-// mask matrix M.
-
-// The mask is present in this case (see GB_AxB_Gustavson_nomask otherwise).
-// This method takes Omega(nnz(M)) time when exploiting the mask, so a very
-// dense mask can be costly to exploit.  Thus, this method is not used, and
-// GB_AxB_Gustavson_nomask is used instead, if the total flop count is less
-// than nnz(M).
-
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (!GB_aliased (C, M)) ;
-    ASSERT (!GB_aliased (C, A)) ;
-    ASSERT (!GB_aliased (C, B)) ;
-    ASSERT (C->vdim == B->vdim) ;
-    ASSERT (C->vlen == A->vlen) ;
-    ASSERT (A->vdim == B->vlen) ;
-    ASSERT (C->vdim == M->vdim) ;
-    ASSERT (C->vlen == M->vlen) ;
-
-    //--------------------------------------------------------------------------
-    // get the Sauna
-    //--------------------------------------------------------------------------
-
-    // clear Sauna_Mark and ensure hiwater+1 does not cause integer overflow
-    int64_t *GB_RESTRICT Sauna_Mark = Sauna->Sauna_Mark ;
-    int64_t hiwater = GB_Sauna_reset (Sauna, 1, 1) ;
-
-    //--------------------------------------------------------------------------
-    // get M
-    //--------------------------------------------------------------------------
-
-    const int64_t *GB_RESTRICT Mp = M->p ;
-    const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
-    size_t msize = M->type->size ;
-    #ifdef GB_HYPER_CASE
-    const int64_t *GB_RESTRICT Mh = M->h ;
-    const int64_t mnvec = M->nvec ;
-    int64_t mpleft = 0 ;
-    int64_t mpright = mnvec - 1 ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // get A and B
-    //--------------------------------------------------------------------------
-
-    const int64_t *GB_RESTRICT Ap = A->p ;
-    const int64_t *GB_RESTRICT Ai = A->i ;
-    const int64_t *GB_RESTRICT Bi = B->i ;
-
-    #ifdef GB_HYPER_CASE
-    const int64_t *GB_RESTRICT Ah = A->h ;
-    int64_t anvec = A->nvec ;
-    int64_t pleft = 0, pright = anvec-1 ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // start the construction of C
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Ci = C->i ;
-    #ifndef GB_HYPER_CASE
-    int64_t *GB_RESTRICT Cp = C->p ;
-    #endif
-
-    int64_t jlast, cnz, cnz_last ;
-    GB_jstartup (C, &jlast, &cnz, &cnz_last) ;
-
-    //--------------------------------------------------------------------------
-    // C<M>=A*B using the Gustavson method, pattern of C is a subset of M
-    //--------------------------------------------------------------------------
-
-    GBI_for_each_vector (B)
-    {
-
-        //----------------------------------------------------------------------
-        // get B(:,j)
-        //----------------------------------------------------------------------
-
-        GBI_jth_iteration (j, pB, pB_end) ;
-
-        //----------------------------------------------------------------------
-        // get M(:,j)
-        //----------------------------------------------------------------------
-
-        // find vector j in M
-        int64_t pM_start, pM_end ;
-        #ifdef GB_HYPER_CASE
-        GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright, j, &pM_start, &pM_end);
-        #else
-        pM_start = Mp [j] ;
-        pM_end   = Mp [j+1] ;
-        #endif
-        ASSERT (pM_start <= pM_end) ;
-        ASSERT (pM_end >= -1) ;
-
-        // C(:,j) is empty if either M(:,j) or B(:,j) are empty
-        int64_t bjnz = pB_end - pB ;
-        if (pM_start == pM_end || bjnz == 0)
-        { 
-            #ifndef GB_HYPER_CASE
-            Cp [j+1] = cnz ;
-            #endif
-            continue ;
-        }
-
-        // M(:,j) has at least one entry; get the first and last index in M(:,j)
-        int64_t im_first = Mi [pM_start] ;
-        int64_t im_last  = Mi [pM_end-1] ;
-
-        #ifdef GB_HYPER_CASE
-        // trim Ah on right
-        if (A_is_hyper)
-        {
-            pleft = 0 ;
-            pright = anvec-1 ;
-            if (bjnz > 2)
-            { 
-                // trim Ah [0..pright] to remove any entries past last B(:,j)
-                int64_t klast = Bi [pB_end-1] ;
-                GB_bracket_right (klast, Ah, 0, &pright) ;
-            }
-        }
-        #endif
-
-        // M(:,j) is not yet scattered into Sauna_Mark
-        bool marked = false ;
-
-        //----------------------------------------------------------------------
-        // C(:,j)<M(:,j)> = A * B(:,j), both values and pattern
-        //----------------------------------------------------------------------
-
-        for ( ; pB < pB_end ; pB++)
-        {
-
-            //------------------------------------------------------------------
-            // get the pattern of B(k,j)
-            //------------------------------------------------------------------
-
-            int64_t k = Bi [pB] ;
-
-            //------------------------------------------------------------------
-            // get A(:,k)
-            //------------------------------------------------------------------
-
-            // find A(:,k), reusing pleft since Bi [...] is sorted
-            int64_t pA, pA_end ;
-            #ifdef GB_HYPER_CASE
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA, &pA_end) ;
-            #else
-            pA     = Ap [k] ;
-            pA_end = Ap [k+1] ;
-            #endif
-
-            // skip if A(:,k) is empty
-            if (pA == pA_end) continue ;
-
-            // skip if the intersection of A(:,k) and M(:,j) is empty
-            if (Ai [pA_end-1] < im_first || Ai [pA] > im_last) continue ;
-
-            //------------------------------------------------------------------
-            // scatter M(:,j) into Sauna_Mark if not yet done
-            //------------------------------------------------------------------
-
-            if (!marked)
-            {
-                for (int64_t pM = pM_start ; pM < pM_end ; pM++)
-                {
-                    // mij = (bool) M (i,j)
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (mij)
-                    { 
-                        // M(i,j) is true
-                        Sauna_Mark [Mi [pM]] = hiwater ;
-                    }
-                }
-                // M(:,j) has been scattered into Sauna_Mark
-                marked = true ;
-
-                // status of Sauna_Mark [0..cvlen-1]:
-                // Sauna_Mark [i] < hiwater: M(i,j)=0, or not present in M(:,j)
-                // Sauna_Mark [i] = hiwater: M(i,j) = 1
-            }
-
-            //------------------------------------------------------------------
-            // get the value of B(k,j)
-            //------------------------------------------------------------------
-
-            GB_GETB (bkj, Bx, pB) ;
-
-            //------------------------------------------------------------------
-            // Sauna += (A(:,k) * B(k,j)) .* M(:,j)
-            //------------------------------------------------------------------
-
-            for ( ; pA < pA_end ; pA++)
-            {
-                // Sauna_Work [i] += (A(i,k) * B(k,j)) .* M(i,j)
-                int64_t i = Ai [pA] ;
-                int64_t mark = Sauna_Mark [i] ;
-                if (mark < hiwater) continue ;
-                // M(i,j) == 1 so do the work
-                GB_GETA (aik, Ax, pA) ;
-
-                // Sauna_Work [i] += A(i,k) * B(k,j)
-                if (mark == hiwater)
-                { 
-                    // first time C(i,j) seen
-                    // Sauna_Work [i] = A(i,k) * B(k,j)
-                    GB_MULT (GB_SAUNA_WORK (i), aik, bkj) ;
-                    Sauna_Mark [i] = hiwater + 1 ;
-                }
-                else
-                { 
-                    // C(i,j) seen before, update it
-                    // Sauna_Work [i] += A(i,k) * B(k,j)
-                    GB_MULTADD (GB_SAUNA_WORK (i), aik, bkj) ;
-                }
-            }
-
-            //------------------------------------------------------------------
-            // status of Sauna_Mark [0..cvlen-1] and Sauna_Work [0..cvlen-1]
-            //------------------------------------------------------------------
-
-            // Sauna_Mark [i] < hiwater:   M(i,j)=0, or not present in M(:,j)
-            // Sauna_Mark [i] = hiwater:   M(i,j)=1, C(i,j) not present;
-            //                             Sauna_Work [i] uninitialized
-            // Sauna_Mark [i] = hiwater+1: M(i,j)=1, and C(i,j) is present;
-            //                             value is Sauna_Work [i]
-        }
-
-        //----------------------------------------------------------------------
-        // check if C(:,j) is empty
-        //----------------------------------------------------------------------
-
-        // if M(:,j) has not been scattered into Sauna_Mark, then C(:,j) must be
-        // empty.  C(:,j) can still be empty if marked is false, but in that
-        // case the Sauna_Mark must still be cleared.
-
-        #ifdef GB_HYPER_CASE
-        if (!marked) continue ;
-        #endif
-
-        //----------------------------------------------------------------------
-        // gather C(:,j), from pattern of M(:,j) and values in Sauna_Work
-        //----------------------------------------------------------------------
-
-        if (marked)
-        {
-            for (int64_t pM = pM_start ; pM < pM_end ; pM++)
-            {
-                int64_t i = Mi [pM] ;
-                if (Sauna_Mark [i] == hiwater+1)
-                { 
-                    // C(i,j) is a live entry, gather its row and value
-                    // Cx [cnz] = Sauna_Work [i] ;
-                    ASSERT (cnz < C->nzmax) ;
-                    GB_COPY_C (GB_CX (cnz), GB_SAUNA_WORK (i)) ;
-                    Ci [cnz++] = i ;
-                }
-            }
-
-            // clear the Sauna_Mark by incrementing hiwater by 2, and ensuring
-            // that the resulting hiwater+1 does not cause integer overflow
-            hiwater = GB_Sauna_reset (Sauna, 2, 1) ;
-        }
-
-        //----------------------------------------------------------------------
-        // log the end of C(:,j)
-        //----------------------------------------------------------------------
-
-        #ifdef GB_HYPER_CASE
-        // cannot fail since C->plen is the upper bound: number of non-empty
-        // columns of B
-        info = GB_jappend (C, j, &jlast, cnz, &cnz_last, NULL) ;
-        ASSERT (info == GrB_SUCCESS) ;
-        #else
-        Cp [j+1] = cnz ;
-        if (cnz > cnz_last) C->nvec_nonempty++ ;
-        cnz_last = cnz ;
-        #endif
-    }
-
-    //--------------------------------------------------------------------------
-    // finalize C
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_HYPER_CASE
-    GB_jwrapup (C, jlast, cnz) ;
-    ASSERT (info == GrB_SUCCESS) ;
-    #else
-    C->magic = GB_MAGIC ;
-    #endif
-}
-
diff --git a/Source/Template/GB_AxB_Gustavson_meta.c b/Source/Template/GB_AxB_Gustavson_meta.c
deleted file mode 100644
index c891b3ce82..0000000000
--- a/Source/Template/GB_AxB_Gustavson_meta.c
+++ /dev/null
@@ -1,49 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson_meta: C=A*B and C<M>=A*B
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-{
-    // Ax and Bx are not used if the multiply operator is SECOND or FIRST,
-    // respectively.
-    #include "GB_unused.h"
-
-    const GB_ATYPE *GB_RESTRICT Ax = A_is_pattern ? NULL : A->x ;
-    const GB_BTYPE *GB_RESTRICT Bx = B_is_pattern ? NULL : B->x ;
-
-    bool A_is_hyper = GB_IS_HYPER (A) ;
-    bool C_is_hyper = GB_IS_HYPER (C) ;
-    bool M_is_hyper = GB_IS_HYPER (M) ;
-    if (A_is_hyper || GB_IS_HYPER (B) || C_is_hyper || M_is_hyper)
-    {
-        #define GB_HYPER_CASE
-        if (M != NULL)
-        { 
-            // C<M> = A*B where M is pattern of C
-            #include "GB_AxB_Gustavson_mask.c"
-        }
-        else
-        { 
-            // C = A*B with pattern of C as defined on input
-            #include "GB_AxB_Gustavson_nomask.c"
-        }
-        #undef GB_HYPER_CASE
-    }
-    else
-    {
-        if (M != NULL)
-        { 
-            // C<M> = A*B where M is pattern of C
-            #include "GB_AxB_Gustavson_mask.c"
-        }
-        else
-        { 
-            // C = A*B with pattern of C as defined on input
-            #include "GB_AxB_Gustavson_nomask.c"
-        }
-    }
-}
diff --git a/Source/Template/GB_AxB_Gustavson_nomask.c b/Source/Template/GB_AxB_Gustavson_nomask.c
deleted file mode 100644
index 24b88b457c..0000000000
--- a/Source/Template/GB_AxB_Gustavson_nomask.c
+++ /dev/null
@@ -1,188 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson_nomask:  C=A*B using Gustavson method, precomputed pattern
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// This file is #include'd in GB_AxB_Gustavson.c, and Template/GB_AxB.c, the
-// latter of which expands into Generated/GB_AxB__* for all built-in semirings.
-
-// The pattern of C has already been computed in the symbolic phase of
-// GB_AxB_Gustavson.  This is Gustavson's method, extended to handle
-// hypersparse matrices and arbitrary semirings.
-
-{
-
-    //--------------------------------------------------------------------------
-    // check inputs
-    //--------------------------------------------------------------------------
-
-    ASSERT (!GB_aliased (C, M)) ;
-    ASSERT (!GB_aliased (C, A)) ;
-    ASSERT (!GB_aliased (C, B)) ;
-    ASSERT (C->vdim == B->vdim) ;
-    ASSERT (C->vlen == A->vlen) ;
-    ASSERT (A->vdim == B->vlen) ;
-    ASSERT (Sauna->Sauna_n >= C->vlen) ;
-
-    //--------------------------------------------------------------------------
-    // get A and B
-    //--------------------------------------------------------------------------
-
-    const int64_t *GB_RESTRICT Ap = A->p ;
-    const int64_t *GB_RESTRICT Ai = A->i ;
-    const int64_t *GB_RESTRICT Bi = B->i ;
-
-    #ifdef GB_HYPER_CASE
-    const int64_t *GB_RESTRICT Ah = A->h ;
-    int64_t anvec = A->nvec ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // get C (pattern already constructed)
-    //--------------------------------------------------------------------------
-
-    const int64_t *GB_RESTRICT Ci = C->i ;
-    const int64_t *GB_RESTRICT Cp = C->p ;
-    ASSERT (C->nvec <= B->nvec) ;
-
-    #ifdef GB_HYPER_CASE
-    const int64_t *GB_RESTRICT Ch = C->h ;
-    int64_t cnvec = C->nvec ;
-    int64_t kc = 0 ;
-    #endif
-
-    // C->p and C->h have already been computed in the symbolic phase
-    ASSERT (C->magic == GB_MAGIC) ;
-
-    //--------------------------------------------------------------------------
-    // C=A*B using the Gustavson's saxpy-based method; precomputed pattern of C
-    //--------------------------------------------------------------------------
-
-    GBI_for_each_vector (B)
-    {
-
-        //----------------------------------------------------------------------
-        // get B(:,j)
-        //----------------------------------------------------------------------
-
-        GBI_jth_iteration (j, pB, pB_end) ;
-        int64_t bjnz = pB_end - pB ;
-        // no work to do if B(:,j) is empty
-        if (bjnz == 0) continue ;
-
-        //----------------------------------------------------------------------
-        // get C(:,j)
-        //----------------------------------------------------------------------
-
-        int64_t pC_start, pC_end ;
-        #ifdef GB_HYPER_CASE
-        if (C_is_hyper)
-        {
-            // C will have a subset of the columns of B, so do a linear-time
-            // search for j in Ch.  The total time for this search is just
-            // O(cnvec), for the entire matrix multiply.  No need for a binary
-            // search using GB_lookup.
-            bool found = false ;
-            for ( ; kc < cnvec && Ch [kc] <= j ; kc++)
-            {
-                found = (Ch [kc] == j) ;
-                if (found)
-                { 
-                    pC_start = Cp [kc] ;
-                    pC_end   = Cp [kc+1] ;
-                    break ;
-                }
-            }
-            // skip if C (:,j) is empty
-            if (!found) continue ;
-        }
-        else
-        #endif
-        { 
-            pC_start = Cp [j] ;
-            pC_end   = Cp [j+1] ;
-        }
-
-        // skip if C(:,j) is empty
-        if (pC_end == pC_start) continue ;
-
-        //----------------------------------------------------------------------
-        // clear Sauna_Work
-        //----------------------------------------------------------------------
-
-        for (int64_t pC = pC_start ; pC < pC_end ; pC++)
-        { 
-            // Sauna_Work [Ci [pC]] = identity ;
-            GB_COPY_C (GB_SAUNA_WORK (Ci [pC]), GB_IDENTITY) ;
-        }
-
-        #ifdef GB_HYPER_CASE
-        // trim Ah on right
-        int64_t pleft = 0 ;
-        int64_t pright = anvec-1 ;
-        if (A_is_hyper && bjnz > 2)
-        { 
-            // trim Ah [0..pright] to remove any entries past the last B(:,j)
-            GB_bracket_right (Bi [pB_end-1], Ah, 0, &pright) ;
-        }
-        #endif
-
-        //----------------------------------------------------------------------
-        // C(:,j) = A * B(:,j)
-        //----------------------------------------------------------------------
-
-        for ( ; pB < pB_end ; pB++)
-        {
-
-            //------------------------------------------------------------------
-            // get B(k,j) and A(:,k)
-            //------------------------------------------------------------------
-
-            // get the pattern of B(k,j)
-            int64_t k = Bi [pB] ;
-
-            // find A(:,k), reusing pleft since Bi [...] is sorted
-            int64_t pA, pA_end ;
-            #ifdef GB_HYPER_CASE
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA, &pA_end) ;
-            #else
-            pA     = Ap [k] ;
-            pA_end = Ap [k+1] ;
-            #endif
-
-            // skip if A(:,k) is empty
-            if (pA == pA_end) continue ;
-
-            // get the value of B(k,j)
-            // bkj = Bx [pB]
-            GB_GETB (bkj, Bx, pB) ;
-
-            //------------------------------------------------------------------
-            // Sauna_Work += A(:,k) * B(k,j)
-            //------------------------------------------------------------------
-
-            for ( ; pA < pA_end ; pA++)
-            { 
-                // Sauna_Work [i] += A(i,k) * B(k,j)
-                int64_t i = Ai [pA] ;
-                GB_GETA (aik, Ax, pA) ;
-                GB_MULTADD (GB_SAUNA_WORK (i), aik, bkj) ;
-            }
-        }
-
-        //----------------------------------------------------------------------
-        // gather C(:,j) from Sauna_Work
-        //----------------------------------------------------------------------
-
-        for (int64_t pC = pC_start ; pC < pC_end ; pC++)
-        { 
-            // Cx [pC] = Sauna_Work [Ci [pC]] ;
-            GB_COPY_C (GB_CX (pC), GB_SAUNA_WORK (Ci [pC])) ;
-        }
-    }
-}
-
diff --git a/Source/Template/GB_AxB_Gustavson_symbolic.c b/Source/Template/GB_AxB_Gustavson_symbolic.c
deleted file mode 100644
index 689ca33d56..0000000000
--- a/Source/Template/GB_AxB_Gustavson_symbolic.c
+++ /dev/null
@@ -1,277 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_Gustavson_symbolic: C=A*B symbolic analysis
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-{
-
-    //--------------------------------------------------------------------------
-    // get A and B
-    //--------------------------------------------------------------------------
-
-    // const int64_t *Bp = B->p ;
-    const int64_t *Bi = B->i ;
-    const int64_t *Ap = A->p ;
-    const int64_t *Ai = A->i ;
-
-    #ifdef GB_HYPER_CASE
-    const int64_t *Ah = A->h ;
-    int64_t anvec = A->nvec ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // start the construction of the pattern of C
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Ci = C->i ;
-    #ifndef GB_HYPER_CASE
-    int64_t *GB_RESTRICT Cp = C->p ;
-    #endif
-
-    int64_t jlast, cnz, cnz_last ;
-    GB_jstartup (C, &jlast, &cnz, &cnz_last) ;
-
-    //--------------------------------------------------------------------------
-    // symbolic pattern of C = A*B
-    //--------------------------------------------------------------------------
-
-    GBI_for_each_vector (B)
-    {
-
-        //----------------------------------------------------------------------
-        // get B(:,j)
-        //----------------------------------------------------------------------
-
-        GBI_jth_iteration (j, pB, pB_end) ;
-
-        //----------------------------------------------------------------------
-        // reallocate C if necessary
-        //----------------------------------------------------------------------
-
-        // Note that cvlen is an upper bound on nnz (C (:,j)), but it can
-        // be a very loose bound if C is hypersparse.
-        int64_t cmax = cnz + cvlen ;
-        if (cmax > C->nzmax)
-        { 
-            GB_OK (GB_ix_realloc (C, 4*(C->nzmax + cvlen), false, NULL)) ;
-            Ci = C->i ;
-        }
-
-        //----------------------------------------------------------------------
-        // C(:,j) = set union of all A(:,k) for each nonzero B(k,j) ;
-        //----------------------------------------------------------------------
-
-        int64_t bjnz = pB_end - pB ;
-
-        #ifdef GB_HYPER_CASE
-        int64_t pleft = 0 ;
-        int64_t pright = anvec-1 ;
-        #endif
-
-        if (bjnz == 0)
-        { 
-
-            //------------------------------------------------------------------
-            // B (:,j) is empty; nothing to do
-            //------------------------------------------------------------------
-
-            #ifdef GB_HYPER_CASE
-            continue ;
-            #endif
-
-        }
-        else if (bjnz == 1)
-        {
-
-            //------------------------------------------------------------------
-            // C (:,j) = A (:,k) for a single nonzero B(k,j)
-            //------------------------------------------------------------------
-
-            // C(:,j) = A(:,k)
-            int64_t k = Bi [pB] ;
-
-            // find A(:,k)
-            int64_t pA, pA_end ;
-            #ifdef GB_HYPER_CASE
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA, &pA_end) ;
-            #else
-            pA     = Ap [k] ;
-            pA_end = Ap [k+1] ;
-            #endif
-
-            for ( ; pA < pA_end ; pA++, cnz++)
-            { 
-                int64_t i = Ai [pA] ;
-                // C(i,j) is nonzero
-                Ci [cnz] = i ;
-            }
-
-        }
-        else if (bjnz == 2)
-        {
-
-            //------------------------------------------------------------------
-            // 2-way merge of A (:,k1) and A (:,k2)
-            //------------------------------------------------------------------
-
-            int64_t k1 = Bi [pB] ;
-            int64_t k2 = Bi [pB+1] ;
-            ASSERT (k1 < k2) ;
-
-            int64_t p1, p1_end, p2, p2_end ;
-
-            // find A(:,k1) and A(:,k2)
-            #ifdef GB_HYPER_CASE
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k1, &p1, &p1_end) ;
-            // Use pleft of k1 to trim the search for k2 since k1 < k2
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k2, &p2, &p2_end) ;
-            #else
-            p1     = Ap [k1] ;
-            p1_end = Ap [k1+1] ;
-            p2     = Ap [k2] ;
-            p2_end = Ap [k2+1] ;
-            #endif
-
-            while (p1 < p1_end || p2 < p2_end)
-            {
-                int64_t i1 = (p1 < p1_end) ? Ai [p1] : cvlen ;
-                int64_t i2 = (p2 < p2_end) ? Ai [p2] : cvlen ;
-                int64_t i ;
-                if (i1 < i2)
-                { 
-                    i = i1 ;
-                    p1++ ;
-                }
-                else if (i1 > i2)
-                { 
-                    i = i2 ;
-                    p2++ ;
-                }
-                else // i1 == i2
-                { 
-                    i = i1 ;
-                    p1++ ;
-                    p2++ ;
-                }
-                // C(i,j) is nonzero
-                Ci [cnz++] = i ;
-            }
-
-        }
-        else
-        {
-
-            //------------------------------------------------------------------
-            // general case, nnz (B (:,j)) > 2
-            //------------------------------------------------------------------
-
-            // hiwater++
-            int64_t hiwater = GB_Sauna_reset (Sauna, 1, 0) ;
-
-            #ifdef GB_HYPER_CASE
-            // trim on right
-            if (A_is_hyper)
-            { 
-                // trim Ah [0..pright] to remove any entries past
-                // the last B(:,j)
-                GB_bracket_right (Bi [pB_end-1], Ah, 0, &pright) ;
-            }
-            #endif
-
-            for ( ; pB < pB_end ; pB++)
-            {
-                // if C(:,j) now completely full, no need to continue
-                if (cnz == cmax) break ;
-
-                // symbolic saxpy C(:,j) += A(:,k)*B(k,j)
-                int64_t k = Bi [pB] ;
-
-                // find A(:,k), reusing pleft since Bi [...] is sorted
-                int64_t pA, pA_end ;
-                #ifdef GB_HYPER_CASE
-                GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA, &pA_end);
-                #else
-                pA     = Ap [k] ;
-                pA_end = Ap [k+1] ;
-                #endif
-
-                for ( ; pA < pA_end ; pA++)
-                {
-                    int64_t i = Ai [pA] ;
-                    // C(i,j) is nonzero
-                    if (Sauna_Mark [i] < hiwater)
-                    { 
-                        // C(i,j) is nonzero, and this is the 1st time row i
-                        // has been added to the pattern in C(:,j).  Mark it so
-                        // row i is not added again.
-                        Sauna_Mark [i] = hiwater ;
-                        // add to the column pattern of A*B
-                        Ci [cnz++] = i ;
-                    }
-                }
-            }
-
-            // sort the pattern of C(:,j)
-            int64_t len = cnz - cnz_last ;
-            if (len == cvlen)
-            {
-                // no need to sort C(:,j) if dense; just recreate it
-                for (int64_t pC = cnz_last, i = 0 ; pC < cnz ; pC++, i++)
-                { 
-                    Ci [pC] = i ;
-                }
-            }
-            else
-            { 
-                // sort the nonzero indices in C(:,j)
-                GB_qsort_1a (Ci + cnz_last, len) ;
-            }
-        }
-
-        //----------------------------------------------------------------------
-        // log the end of vector C(:,j)
-        //----------------------------------------------------------------------
-
-        #ifdef GB_HYPER_CASE
-        // this cannot fail since C->plen is the upper bound: the number
-        // of non-empty columns of B.
-        info = GB_jappend (C, j, &jlast, cnz, &cnz_last, NULL) ;
-        ASSERT (info == GrB_SUCCESS) ;
-        // if it could fail:
-        // GB_OK (info) ;              // check result and return on error
-        #else
-        Cp [j+1] = cnz ;
-        if (cnz > cnz_last) C->nvec_nonempty++ ;
-        cnz_last = cnz ;
-        #endif
-
-        // it also cannot run out of space here, but can do so above
-        ASSERT (cnz <= C->nzmax) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // finalize C and clear the Sauna
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_HYPER_CASE
-    GB_jwrapup (C, jlast, cnz) ;
-    #else
-    C->magic = GB_MAGIC ;
-    #endif
-
-    // clear the Sauna_Mark array
-    GB_Sauna_reset (Sauna, 1, 0) ;
-
-    //--------------------------------------------------------------------------
-    // reduce the size of C->i to hold just the required space
-    //--------------------------------------------------------------------------
-
-    info = GB_ix_realloc (C, cnz, false, NULL) ;
-    ASSERT (info == GrB_SUCCESS) ;
-    // ASSERT_MATRIX_OK (C, "C symbolic Gustavson C=A*B", GB0) ;
-}
-
diff --git a/Source/Template/GB_AxB_colscale_meta.c b/Source/Template/GB_AxB_colscale_meta.c
index 1e646e53d5..34deb0259b 100644
--- a/Source/Template/GB_AxB_colscale_meta.c
+++ b/Source/Template/GB_AxB_colscale_meta.c
@@ -2,7 +2,7 @@
 // GB_AxB_colscale_meta: C=A*D where D is a square diagonal matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,7 +12,7 @@
 
 {
 
-    // Dx, j, and Ah are unused if the operator is FIRST
+    // Dx, j, and Ah are unused if the operator is FIRST or PAIR
     #include "GB_unused.h"
 
     //--------------------------------------------------------------------------
diff --git a/Source/Template/GB_AxB_compare_factory.c b/Source/Template/GB_AxB_compare_factory.c
index 7c605fbfa5..c462ddd713 100644
--- a/Source/Template/GB_AxB_compare_factory.c
+++ b/Source/Template/GB_AxB_compare_factory.c
@@ -2,7 +2,7 @@
 // GB_AxB_compare_factory.c: switch factory for C=A*B with comparator ops
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -117,6 +117,27 @@ ASSERT (zcode == GB_BOOL_code) ;
             }
             break ;
 
+        case GB_ANY_opcode    :
+
+            switch (xycode)
+            {
+                #ifndef GB_NO_BOOLEAN
+                case GB_BOOL_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _bool  )
+                #endif
+                case GB_INT8_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _int8  )
+                case GB_UINT8_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint8 )
+                case GB_INT16_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int16 )
+                case GB_UINT16_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint16)
+                case GB_INT32_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int32 )
+                case GB_UINT32_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint32)
+                case GB_INT64_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int64 )
+                case GB_UINT64_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint64)
+                case GB_FP32_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _fp32  )
+                case GB_FP64_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _fp64  )
+                default: ;
+            }
+            break ;
+
         default: ;
     }
 }
diff --git a/Source/Template/GB_AxB_dot2_compmask.c b/Source/Template/GB_AxB_dot2_compmask.c
index 27479397f7..1a5a136c6c 100644
--- a/Source/Template/GB_AxB_dot2_compmask.c
+++ b/Source/Template/GB_AxB_dot2_compmask.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot2_compmask:  C<!M>=A'*B via dot products
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -96,7 +96,7 @@
                 GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
                 if (found)
                 {
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    mij = GB_mcast (Mx, pM, msize) ;
                 }
                 if (!mij)
                 { 
diff --git a/Source/Template/GB_AxB_dot2_meta.c b/Source/Template/GB_AxB_dot2_meta.c
index 1918e7c595..8e8d15a3d2 100644
--- a/Source/Template/GB_AxB_dot2_meta.c
+++ b/Source/Template/GB_AxB_dot2_meta.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot2_meta: C=A'*B or C<!M>=A'*B via dot productes
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -40,7 +40,7 @@
 
     }
     else
-    {
+    { 
 
         //----------------------------------------------------------------------
         // get M
@@ -49,8 +49,7 @@
         const int64_t *GB_RESTRICT Mp = M->p ;
         const int64_t *GB_RESTRICT Mh = M->h ;
         const int64_t *GB_RESTRICT Mi = M->i ;
-        const GB_void *GB_RESTRICT Mx = M->x ;
-        GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code);
+        const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
         size_t msize = M->type->size ;
         const int64_t mnvec = M->nvec ;
         bool M_is_hyper = GB_IS_HYPER (M) ;
diff --git a/Source/Template/GB_AxB_dot2_nomask.c b/Source/Template/GB_AxB_dot2_nomask.c
index 5ee8ca601b..2a0835726a 100644
--- a/Source/Template/GB_AxB_dot2_nomask.c
+++ b/Source/Template/GB_AxB_dot2_nomask.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot2_nomask:  C=A'*B via dot products
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_AxB_dot3_template.c b/Source/Template/GB_AxB_dot3_template.c
index dfcb4db3d3..4e1557a5cb 100644
--- a/Source/Template/GB_AxB_dot3_template.c
+++ b/Source/Template/GB_AxB_dot3_template.c
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
-// GB_AxB_dot3_template: C<M>=A'*B via dot productes
+// GB_AxB_dot3_template: C<M>=A'*B via dot products
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,8 +31,7 @@
     const bool B_is_hyper = B->is_hyper ;
 
     const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
     const size_t msize = M->type->size ;
 
     const int64_t *GB_RESTRICT Ah = A->h ;
@@ -146,9 +145,7 @@
 
                     // get the value of M(i,j)
                     int64_t i = Mi [pC] ;
-                    bool mij ;
-                    cast_M (&mij, Mx +(pC*msize), 0) ;
-                    if (mij)
+                    if (GB_mcast (Mx, pC, msize))   // note: Mx [pC], same as Cx
                     { 
 
                         //------------------------------------------------------
diff --git a/Source/Template/GB_AxB_dot4_template.c b/Source/Template/GB_AxB_dot4_template.c
new file mode 100644
index 0000000000..9ae4368c72
--- /dev/null
+++ b/Source/Template/GB_AxB_dot4_template.c
@@ -0,0 +1,387 @@
+//------------------------------------------------------------------------------
+// GB_AxB_dot4:  C+=A'*B via dot products, where C is dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C+=A'*B where C is a dense matrix and computed in-place.  The monoid of the
+// semiring matches the accum operator, and the type of C matches the ztype of
+// accum.  That is, no typecasting can be done with C.
+
+// The PAIR operator as the multiplier provides important special cases.
+// See Template/GB_AxB_dot_cij.c for details.
+
+// cij += A(k,i) * B(k,j)
+#undef  GB_DOT_MERGE
+#define GB_DOT_MERGE                                                \
+{                                                                   \
+    if (!cij_updated)                                               \
+    {                                                               \
+        cij_updated = true ;                                        \
+        GB_GETC (cij, pC) ;                                         \
+    }                                                               \
+    GB_GETA (aki, Ax, pA) ;         /* aki = A(k,i) */              \
+    GB_GETB (bkj, Bx, pB) ;         /* bkj = B(k,j) */              \
+    GB_MULTADD (cij, aki, bkj) ;    /* cij += aki * bkj */          \
+    GB_DOT_TERMINAL (cij) ;         /* break if cij == terminal */  \
+    pA++ ;                                                          \
+    pB++ ;                                                          \
+}
+
+{
+
+    //--------------------------------------------------------------------------
+    // get A, B, and C
+    //--------------------------------------------------------------------------
+
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+    const int64_t cvlen = C->vlen ;
+
+    const int64_t  *GB_RESTRICT Bp = B->p ;
+    const int64_t  *GB_RESTRICT Bh = B->h ;
+    const int64_t  *GB_RESTRICT Bi = B->i ;
+    const GB_BTYPE *GB_RESTRICT Bx = B_is_pattern ? NULL : B->x ;
+    const int64_t bvlen = B->vlen ;
+
+    const int64_t  *GB_RESTRICT Ap = A->p ;
+    const int64_t  *GB_RESTRICT Ah = A->h ;
+    const int64_t  *GB_RESTRICT Ai = A->i ;
+    const GB_ATYPE *GB_RESTRICT Ax = A_is_pattern ? NULL : A->x ;
+    ASSERT (A->vlen == B->vlen) ;
+
+    int ntasks = naslice * nbslice ;
+
+    //--------------------------------------------------------------------------
+    // C += A'*B
+    //--------------------------------------------------------------------------
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        //----------------------------------------------------------------------
+        // get the entries in A and B to compute
+        //----------------------------------------------------------------------
+
+        int a_taskid = taskid / nbslice ;
+        int b_taskid = taskid % nbslice ;
+
+        int64_t akfirst = A_slice [a_taskid] ;
+        int64_t aklast  = A_slice [a_taskid+1] ;
+        if (akfirst >= aklast) continue ;
+
+        int64_t bkfirst = B_slice [b_taskid] ;
+        int64_t bklast  = B_slice [b_taskid+1] ;
+        if (bkfirst >= bklast) continue ;
+
+        //----------------------------------------------------------------------
+        // C+=A'*B via dot products
+        //----------------------------------------------------------------------
+
+        for (int64_t bk = bkfirst ; bk < bklast ; bk++)
+        {
+
+            //------------------------------------------------------------------
+            // get B(:,j)
+            //------------------------------------------------------------------
+
+            int64_t j = (Bh == NULL) ? bk : Bh [bk] ;
+            int64_t pB_start = Bp [bk] ;
+            int64_t pB_end   = Bp [bk+1] ;
+            int64_t pC_start = j * cvlen ;
+            int64_t bjnz = pB_end - pB_start ;
+            if (bjnz == 0) continue ;
+
+            if (bjnz == bvlen)
+            {
+
+                //--------------------------------------------------------------
+                // B(:,j) is dense
+                //--------------------------------------------------------------
+
+                for (int64_t ak = akfirst ; ak < aklast ; ak++)
+                {
+
+                    //----------------------------------------------------------
+                    // get A(:,i)
+                    //----------------------------------------------------------
+
+                    int64_t i = (Ah == NULL) ? ak : Ah [ak] ;
+                    int64_t pA     = Ap [ak] ;
+                    int64_t pA_end = Ap [ak+1] ;
+                    int64_t ainz = pA_end - pA ;
+                    if (ainz == 0) continue ;
+
+                    GB_CIJ_DECLARE (cij) ;          // declare the cij scalar
+                    int64_t pC = i + pC_start ;     // C(i,j) is at Cx [pC]
+                    int64_t pB = pB_start ;
+                    GB_GETC (cij, pC) ;             // cij = Cx [pC]
+
+                    //----------------------------------------------------------
+                    // special cases for the PAIR multiplier
+                    //----------------------------------------------------------
+
+                    // Since B(:,j) is dense, C(i,j) += A(:,i)'*B(:,j) is
+                    // trivial to compute with the PAIR multiplier.
+
+                    #if GB_IS_PAIR_MULTIPLIER
+
+                        #if GB_IS_ANY_MONOID
+                        // ANY monoid: take the first entry found
+                        cij = 1 ;
+                        #elif GB_IS_EQ_MONOID
+                        // A(:,i)'*B(:j) is one, so this result must be
+                        // accumulated into cij, as cij += 1, where the
+                        // accumulator is the EQ operator.
+                        cij = (cij == 1) ;
+                        #elif (GB_CTYPE_BITS > 0)
+                        // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
+                        // for bool, 8-bit, 16-bit, or 32-bit integer
+                        uint64_t t = ((uint64_t) cij) + ainz ;
+                        cij = (GB_CTYPE) (t & GB_CTYPE_BITS) ;
+                        #else
+                        // PLUS monoid for float, double, or 64-bit integers 
+                        cij += (GB_CTYPE) ainz ;
+                        #endif
+
+                    #else
+
+                    //----------------------------------------------------------
+                    // general case
+                    //----------------------------------------------------------
+
+                    if (ainz == bvlen)
+                    {
+
+                        //------------------------------------------------------
+                        // both A(:,i) and B(:,j) are dense
+                        //------------------------------------------------------
+
+                        GB_PRAGMA_VECTORIZE_DOT
+                        for (int64_t k = 0 ; k < bvlen ; k++)
+                        { 
+                            GB_DOT_TERMINAL (cij) ;         // break if terminal
+                            // cij += A(k,i) * B(k,j)
+                            GB_GETA (aki, Ax, pA+k) ;       // aki = A(k,i)
+                            GB_GETB (bkj, Bx, pB+k) ;       // bkj = B(k,j)
+                            GB_MULTADD (cij, aki, bkj) ;    // cij += aki * bkj
+                        }
+
+                    }
+                    else
+                    {
+
+                        //------------------------------------------------------
+                        // A(:,i) is sparse and B(:,j) is dense
+                        //------------------------------------------------------
+
+                        GB_PRAGMA_VECTORIZE_DOT
+                        for (int64_t p = pA ; p < pA_end ; p++)
+                        { 
+                            GB_DOT_TERMINAL (cij) ;         // break if terminal
+                            int64_t k = Ai [p] ;
+                            // cij += A(k,i) * B(k,j)
+                            GB_GETA (aki, Ax, p   ) ;       // aki = A(k,i)
+                            GB_GETB (bkj, Bx, pB+k) ;       // bkj = B(k,j)
+                            GB_MULTADD (cij, aki, bkj) ;    // cij += aki * bkj
+                        }
+                    }
+
+                    #endif
+                    GB_PUTC (cij, pC) ;                 // Cx [pC] = cij
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // B(:,j) is sparse
+                //--------------------------------------------------------------
+
+                // get the first and last index in B(:,j)
+                int64_t ib_first = Bi [pB_start] ;
+                int64_t ib_last  = Bi [pB_end-1] ;
+
+                for (int64_t ak = akfirst ; ak < aklast ; ak++)
+                {
+
+                    //----------------------------------------------------------
+                    // get A(:,i)
+                    //----------------------------------------------------------
+
+                    int64_t i = (Ah == NULL) ? ak : Ah [ak] ;
+                    int64_t pA     = Ap [ak] ;
+                    int64_t pA_end = Ap [ak+1] ;
+                    int64_t ainz = pA_end - pA ;
+                    if (ainz == 0) continue ;
+                    // get the first and last index in A(:,i)
+                    if (Ai [pA_end-1] < ib_first || ib_last < Ai [pA]) continue;
+
+                    //----------------------------------------------------------
+                    // C(i,j) += A(:,i)'*B(:,j)
+                    //----------------------------------------------------------
+
+                    GB_CIJ_DECLARE (cij) ;          // declare the cij scalar
+                    int64_t pC = i + pC_start ;     // C(i,j) is at Cx [pC]
+                    int64_t pB = pB_start ;
+
+                    if (ainz == bvlen)
+                    {
+
+                        //------------------------------------------------------
+                        // A(:,i) is dense and B(:,j) is sparse
+                        //------------------------------------------------------
+
+                        GB_GETC (cij, pC) ;                 // cij = Cx [pC]
+
+                        #if GB_IS_PAIR_MULTIPLIER
+
+                            #if GB_IS_ANY_MONOID
+                            // ANY monoid: take the first entry found
+                            cij = 1 ;
+                            #elif GB_IS_EQ_MONOID
+                            // A(:,i)'*B(:j) is one, so this result must be
+                            // accumulated into cij, as cij += 1, where the
+                            // accumulator is the EQ operator.
+                            cij = (cij == 1) ;
+                            #elif (GB_CTYPE_BITS > 0)
+                            // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
+                            // for bool, 8-bit, 16-bit, or 32-bit integer
+                            uint64_t t = ((uint64_t) cij) + bjnz ;
+                            cij = (GB_CTYPE) (t & GB_CTYPE_BITS) ;
+                            #else
+                            // PLUS monoid for float, double, or 64-bit integers
+                            cij += (GB_CTYPE) bjnz ;
+                            #endif
+
+                        #else
+
+                            GB_PRAGMA_VECTORIZE_DOT
+                            for (int64_t p = pB ; p < pB_end ; p++)
+                            { 
+                                GB_DOT_TERMINAL (cij) ;   // break if terminal
+                                int64_t k = Bi [p] ;
+                                // cij += A(k,i) * B(k,j)
+                                GB_GETA (aki, Ax, pA+k) ;     // aki = A(k,i)
+                                GB_GETB (bkj, Bx, p   ) ;     // bkj = B(k,j)
+                                GB_MULTADD (cij, aki, bkj) ;  // cij += aki*bkj
+                            }
+
+                        #endif
+
+                        GB_PUTC (cij, pC) ;                 // Cx [pC] = cij
+
+                    }
+                    else if (ainz > 8 * bjnz)
+                    {
+
+                        //------------------------------------------------------
+                        // B(:,j) is very sparse compared to A(:,i)
+                        //------------------------------------------------------
+
+                        bool cij_updated = false ;
+                        while (pA < pA_end && pB < pB_end)
+                        {
+                            int64_t ia = Ai [pA] ;
+                            int64_t ib = Bi [pB] ;
+                            if (ia < ib)
+                            { 
+                                // A(ia,i) appears before B(ib,j)
+                                // discard all entries A(ia:ib-1,i)
+                                int64_t pleft = pA + 1 ;
+                                int64_t pright = pA_end - 1 ;
+                                GB_TRIM_BINARY_SEARCH (ib, Ai, pleft, pright) ;
+                                ASSERT (pleft > pA) ;
+                                pA = pleft ;
+                            }
+                            else if (ib < ia)
+                            { 
+                                // B(ib,j) appears before A(ia,i)
+                                pB++ ;
+                            }
+                            else // ia == ib == k
+                            { 
+                                // A(k,i) and B(k,j) are next entries to merge
+                                GB_DOT_MERGE ;
+                            }
+                        }
+                        if (cij_updated) GB_PUTC (cij, pC) ;
+
+                    }
+                    else if (bjnz > 8 * ainz)
+                    {
+
+                        //------------------------------------------------------
+                        // A(:,i) is very sparse compared to B(:,j)
+                        //------------------------------------------------------
+
+                        bool cij_updated = false ;
+                        while (pA < pA_end && pB < pB_end)
+                        {
+                            int64_t ia = Ai [pA] ;
+                            int64_t ib = Bi [pB] ;
+                            if (ia < ib)
+                            { 
+                                // A(ia,i) appears before B(ib,j)
+                                pA++ ;
+                            }
+                            else if (ib < ia)
+                            { 
+                                // B(ib,j) appears before A(ia,i)
+                                // discard all entries B(ib:ia-1,j)
+                                int64_t pleft = pB + 1 ;
+                                int64_t pright = pB_end - 1 ;
+                                GB_TRIM_BINARY_SEARCH (ia, Bi, pleft, pright) ;
+                                ASSERT (pleft > pB) ;
+                                pB = pleft ;
+                            }
+                            else // ia == ib == k
+                            { 
+                                // A(k,i) and B(k,j) are next entries to merge
+                                GB_DOT_MERGE ;
+                            }
+                        }
+                        if (cij_updated) GB_PUTC (cij, pC) ;
+
+                    }
+                    else
+                    {
+
+                        //------------------------------------------------------
+                        // A(:,i) and B(:,j) have about the same sparsity
+                        //------------------------------------------------------
+
+                        bool cij_updated = false ;
+                        while (pA < pA_end && pB < pB_end)
+                        {
+                            int64_t ia = Ai [pA] ;
+                            int64_t ib = Bi [pB] ;
+                            if (ia < ib)
+                            { 
+                                // A(ia,i) appears before B(ib,j)
+                                pA++ ;
+                            }
+                            else if (ib < ia)
+                            { 
+                                // B(ib,j) appears before A(ia,i)
+                                pB++ ;
+                            }
+                            else // ia == ib == k
+                            { 
+                                // A(k,i) and B(k,j) are the entries to merge
+                                GB_DOT_MERGE ;
+                            }
+                        }
+                        if (cij_updated) GB_PUTC (cij, pC) ;
+                    }
+                }
+            }
+        }
+    }
+}
+
diff --git a/Source/Template/GB_AxB_dot_cij.c b/Source/Template/GB_AxB_dot_cij.c
index 9a8d4ba743..7f8bbb6943 100644
--- a/Source/Template/GB_AxB_dot_cij.c
+++ b/Source/Template/GB_AxB_dot_cij.c
@@ -2,7 +2,7 @@
 // GB_AxB_dot_cij: compute C(i,j) = A(:,i)'*B(:,j)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,9 +21,30 @@
 // Only one of the three are #defined: either GB_PHASE_1_OF_2, GB_PHASE_2_OF_2,
 // or GB_DOT3.
 
-#undef GB_DOT_MERGE
+// When used as the multiplicative operator, the PAIR operator provides some
+// useful special cases.  Its output is always one, for any matching pair of
+// entries A(k,i)'*B(k,j) for some k.  If the monoid is ANY, then C(i,j)=1 if
+// the intersection for the dot product is non-empty.  This intersection has to
+// be found, in general.  However, suppose B(:,j) is dense.  Then every entry
+// in the pattern of A(:,i)' will produce a 1 from the PAIR operator.  If the
+// monoid is ANY, then C(i,j)=1 if A(:,i)' is nonempty.  If the monoid is PLUS,
+// then C(i,j) is simply nnz(A(:,i)), assuming no overflow.  The XOR monoid
+// acts like a 1-bit summation, so the result of the XOR_PAIR_BOOL semiring
+// will be C(i,j) = mod (nnz(A(:,j)),2).
+
+// If both A(:,i) and B(:,j) are sparse, then the intersection must still be
+// found, so these optimizations can be used only if A(:,i) and/or B(:,j) are
+// fully populated.
+
+// For built-in, pre-generated semirings, the PAIR operator is only coupled
+// with either the ANY, PLUS, EQ, or XOR monoids, since the other monoids are
+// equivalent to the ANY monoid.  With no accumulator, EQ is the same as ANY,
+// they differ for the C+=A'*B operation (see *dot4*).
+
+#include "GB_unused.h"
 
 // cij += A(k,i) * B(k,j), for merge operation
+#undef  GB_DOT_MERGE
 #define GB_DOT_MERGE                                                \
 {                                                                   \
     GB_GETA (aki, Ax, pA) ;             /* aki = A(k,i) */          \
@@ -93,22 +114,40 @@
         cij_exists = true ;
 
         #if defined ( GB_PHASE_2_OF_2 ) || defined ( GB_DOT3 )
-        // cij = A(0,i) * B(0,j)
-        GB_GETA (aki, Ax, pA) ;             // aki = A(0,i)
-        GB_GETB (bkj, Bx, pB) ;             // bkj = B(0,j)
-        GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
+            #if GB_IS_PAIR_MULTIPLIER
+
+                #if (GB_IS_ANY_MONOID || GB_IS_EQ_MONOID)
+                // ANY monoid: take the first entry found
+                cij = 1 ;
+                #elif (GB_CTYPE_BITS > 0)
+                // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
+                // for bool, 8-bit, 16-bit, or 32-bit integer
+                cij = (GB_CTYPE) (((uint64_t) bvlen) & GB_CTYPE_BITS) ;
+                #else
+                // PLUS monoid for float, double, or 64-bit integers 
+                cij = (GB_CTYPE) bvlen ;
+                #endif
 
-        GB_DOT_SIMD
-        for (int64_t k = 1 ; k < bvlen ; k++)
-        { 
-            GB_DOT_TERMINAL (cij) ;             // break if cij == terminal
-            // cij += A(k,i) * B(k,j)
-            GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
-            GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
-            GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
-        }
+            #else
+
+                // cij = A(0,i) * B(0,j)
+                GB_GETA (aki, Ax, pA) ;             // aki = A(0,i)
+                GB_GETB (bkj, Bx, pB) ;             // bkj = B(0,j)
+                GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
+                GB_PRAGMA_VECTORIZE_DOT
+                for (int64_t k = 1 ; k < bvlen ; k++)
+                { 
+                    GB_DOT_TERMINAL (cij) ;             // break if cij terminal
+                    // cij += A(k,i) * B(k,j)
+                    GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
+                    GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
+                    GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
+                }
+
+            #endif
         #endif
 
+
     }
     else if (ainz == bvlen)
     {
@@ -120,22 +159,39 @@
         cij_exists = true ;
 
         #if defined ( GB_PHASE_2_OF_2 ) || defined ( GB_DOT3 )
-        int64_t k = Bi [pB] ;               // first row index of B(:,j)
-        // cij = A(k,i) * B(k,j)
-        GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
-        GB_GETB (bkj, Bx, pB  ) ;           // bkj = B(k,j)
-        GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
-
-        GB_DOT_SIMD
-        for (int64_t p = pB+1 ; p < pB_end ; p++)
-        { 
-            GB_DOT_TERMINAL (cij) ;             // break if cij == terminal
-            int64_t k = Bi [p] ;                // next row index of B(:,j)
-            // cij += A(k,i) * B(k,j)
-            GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
-            GB_GETB (bkj, Bx, p   ) ;           // bkj = B(k,j)
-            GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
-        }
+            #if GB_IS_PAIR_MULTIPLIER
+
+                #if (GB_IS_ANY_MONOID || GB_IS_EQ_MONOID)
+                // ANY monoid: take the first entry found
+                cij = 1 ;
+                #elif (GB_CTYPE_BITS > 0)
+                // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
+                // for bool, 8-bit, 16-bit, or 32-bit integer
+                cij = (GB_CTYPE) (((uint64_t) bjnz) & GB_CTYPE_BITS) ;
+                #else
+                // PLUS monoid for float, double, or 64-bit integers 
+                cij = (GB_CTYPE) bjnz ;
+                #endif
+
+            #else
+
+                int64_t k = Bi [pB] ;               // first row index of B(:,j)
+                // cij = A(k,i) * B(k,j)
+                GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
+                GB_GETB (bkj, Bx, pB  ) ;           // bkj = B(k,j)
+                GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
+                GB_PRAGMA_VECTORIZE_DOT
+                for (int64_t p = pB+1 ; p < pB_end ; p++)
+                { 
+                    GB_DOT_TERMINAL (cij) ;             // break if cij terminal
+                    int64_t k = Bi [p] ;                // next index of B(:,j)
+                    // cij += A(k,i) * B(k,j)
+                    GB_GETA (aki, Ax, pA+k) ;           // aki = A(k,i)
+                    GB_GETB (bkj, Bx, p   ) ;           // bkj = B(k,j)
+                    GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
+                }
+
+            #endif
         #endif
 
     }
@@ -149,22 +205,39 @@
         cij_exists = true ;
 
         #if defined ( GB_PHASE_2_OF_2 ) || defined ( GB_DOT3 )
-        int64_t k = Ai [pA] ;               // first row index of A(:,i)
-        // cij = A(k,i) * B(k,j)
-        GB_GETA (aki, Ax, pA  ) ;           // aki = A(k,i)
-        GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
-        GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
-
-        GB_DOT_SIMD
-        for (int64_t p = pA+1 ; p < pA_end ; p++)
-        { 
-            GB_DOT_TERMINAL (cij) ;             // break if cij == terminal
-            int64_t k = Ai [p] ;                // next row index of A(:,i)
-            // cij += A(k,i) * B(k,j)
-            GB_GETA (aki, Ax, p   ) ;           // aki = A(k,i)
-            GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
-            GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
-        }
+            #if GB_IS_PAIR_MULTIPLIER
+
+                #if (GB_IS_ANY_MONOID || GB_IS_EQ_MONOID)
+                // ANY monoid: take the first entry found
+                cij = 1 ;
+                #elif (GB_CTYPE_BITS > 0)
+                // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)),
+                // for bool, 8-bit, 16-bit, or 32-bit integer
+                cij = (GB_CTYPE) (((uint64_t) ainz) & GB_CTYPE_BITS) ;
+                #else
+                // PLUS monoid for float, double, or 64-bit integers 
+                cij = (GB_CTYPE) ainz ;
+                #endif
+
+            #else
+
+                int64_t k = Ai [pA] ;               // first row index of A(:,i)
+                // cij = A(k,i) * B(k,j)
+                GB_GETA (aki, Ax, pA  ) ;           // aki = A(k,i)
+                GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
+                GB_MULT (cij, aki, bkj) ;           // cij = aki * bkj
+                GB_PRAGMA_VECTORIZE_DOT
+                for (int64_t p = pA+1 ; p < pA_end ; p++)
+                { 
+                    GB_DOT_TERMINAL (cij) ;             // break if cij terminal
+                    int64_t k = Ai [p] ;                // next index of A(:,i)
+                    // cij += A(k,i) * B(k,j)
+                    GB_GETA (aki, Ax, p   ) ;           // aki = A(k,i)
+                    GB_GETB (bkj, Bx, pB+k) ;           // bkj = B(k,j)
+                    GB_MULTADD (cij, aki, bkj) ;        // cij += aki * bkj
+                }
+
+            #endif
         #endif
 
     }
@@ -185,7 +258,7 @@
                 // discard all entries A(ia:ib-1,i)
                 int64_t pleft = pA + 1 ;
                 int64_t pright = pA_end - 1 ;
-                GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ;
+                GB_TRIM_BINARY_SEARCH (ib, Ai, pleft, pright) ;
                 ASSERT (pleft > pA) ;
                 pA = pleft ;
             }
@@ -201,7 +274,7 @@
                 cij_exists = true ;
                 break ;
                 #else
-                GB_DOT_MERGE ;
+                GB_DOT_MERGE
                 GB_DOT_TERMINAL (cij) ;         // break if cij == terminal
                 pA++ ;
                 pB++ ;
@@ -232,7 +305,7 @@
                 // discard all entries B(ib:ia-1,j)
                 int64_t pleft = pB + 1 ;
                 int64_t pright = pB_end - 1 ;
-                GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ;
+                GB_TRIM_BINARY_SEARCH (ia, Bi, pleft, pright) ;
                 ASSERT (pleft > pB) ;
                 pB = pleft ;
             }
@@ -243,7 +316,7 @@
                 cij_exists = true ;
                 break ;
                 #else
-                GB_DOT_MERGE ;
+                GB_DOT_MERGE
                 GB_DOT_TERMINAL (cij) ;         // break if cij == terminal
                 pA++ ;
                 pB++ ;
@@ -280,7 +353,7 @@
                 cij_exists = true ;
                 break ;
                 #else
-                GB_DOT_MERGE ;
+                GB_DOT_MERGE
                 GB_DOT_TERMINAL (cij) ;         // break if cij == terminal
                 pA++ ;
                 pB++ ;
diff --git a/Source/Template/GB_AxB_factory.c b/Source/Template/GB_AxB_factory.c
index c3fae2e5d6..0788fd507c 100644
--- a/Source/Template/GB_AxB_factory.c
+++ b/Source/Template/GB_AxB_factory.c
@@ -2,7 +2,7 @@
 // GB_AxB_factory: switch factory for C=A*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -21,6 +21,10 @@
 //          operator is TxT->bool (for the comparison operators, LT, GT, etc),
 //          and where the monoid is bool x bool -> bool.
 
+// If the multiplicative operator is ANY, then it is replaced here by SECOND,
+// since that is faster for the saxpy-based methods (y is the value of B(k,j),
+// which is loaded less frequently from memory than A(i,k)).
+
 {
     //--------------------------------------------------------------------------
     // launch the switch factory
@@ -41,6 +45,7 @@
 
         //----------------------------------------------------------------------
         case GB_SECOND_opcode  :    // z = y
+        case GB_ANY_opcode     :    // z = y
         //----------------------------------------------------------------------
 
             // 44 semirings: (min,max,plus,times) for non-boolean, and
@@ -49,6 +54,21 @@
             #include "GB_AxB_type_factory.c"
             break ;
 
+        //----------------------------------------------------------------------
+        case GB_PAIR_opcode   :    // z = 1
+        //----------------------------------------------------------------------
+
+            // land_pair, lor_pair, max_pair, min_pair, times_pair
+            // all become any_pair.
+
+            // 44 semirings: (min,max,plus,times) for non-boolean, and
+            // (or,and,xor,eq) for boolean
+            #define GB_MULT_IS_PAIR_OPERATOR
+            #define GB_MULT_NAME _pair
+            #include "GB_AxB_type_factory.c"
+            #undef  GB_MULT_IS_PAIR_OPERATOR
+            break ;
+
         //----------------------------------------------------------------------
         case GB_MIN_opcode     :    // z = min(x,y)
         //----------------------------------------------------------------------
diff --git a/Source/Template/GB_AxB_heap_mask.c b/Source/Template/GB_AxB_heap_mask.c
deleted file mode 100644
index 5ed7419bb9..0000000000
--- a/Source/Template/GB_AxB_heap_mask.c
+++ /dev/null
@@ -1,507 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_heap_mask:  compute C<M>=A*B using the heap method, with M present
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-// This file is #include'd in GB_AxB_heap_meta.c.
-
-// if GB_MASK_CASE is defined, then the mask matrix M is present.  Otherwise it
-// is not present.  However, this method takes Omega(nnz(M)) time when
-// exploiting the mask, so a very dense mask can be costly to exploit.  Thus,
-// the mask is not passed to the heap method if the total flop count is less
-// than nnz(M).
-
-#ifndef GB_HEAP_FREE_WORK
-#define GB_HEAP_FREE_WORK
-#endif
-
-{
-
-    //--------------------------------------------------------------------------
-    // get M
-    //--------------------------------------------------------------------------
-
-    #ifdef GB_MASK_CASE
-    const int64_t *GB_RESTRICT Mp = M->p ;
-    const int64_t *GB_RESTRICT Mh = M->h ;
-    const int64_t *GB_RESTRICT Mi = M->i ;
-    const GB_void *GB_RESTRICT Mx = M->x ;
-    GB_cast_function cast_M = GB_cast_factory (GB_BOOL_code, M->type->code);
-    size_t msize = M->type->size ;
-    const int64_t mnvec = M->nvec ;
-    int64_t mpleft = 0 ;
-    int64_t mpright = mnvec - 1 ;
-    bool M_is_hyper = GB_IS_HYPER (M) ;
-    #endif
-
-    //--------------------------------------------------------------------------
-    // get A and B
-    //--------------------------------------------------------------------------
-
-    const int64_t *GB_RESTRICT Ah = A->h ;
-    const int64_t *GB_RESTRICT Ap = A->p ;
-    const int64_t *GB_RESTRICT Ai = A->i ;
-    const int64_t *GB_RESTRICT Bi = B->i ;
-    int64_t anvec = A->nvec ;
-
-    // if A is hypersparse but all vectors are present, then
-    // treat A as if it were non-hypersparse
-    bool A_is_hyper = A->is_hyper && (anvec < A->vdim) ;
-
-    //--------------------------------------------------------------------------
-    // start the construction of C
-    //--------------------------------------------------------------------------
-
-    int64_t *GB_RESTRICT Ci = C->i ;
-
-    int64_t jlast, cnz, cnz_last ;
-    GB_jstartup (C, &jlast, &cnz, &cnz_last) ;
-
-    //--------------------------------------------------------------------------
-    // C<M> = A*B
-    //--------------------------------------------------------------------------
-
-    GBI_for_each_vector (B)
-    {
-
-        //----------------------------------------------------------------------
-        // get B(:,j)
-        //----------------------------------------------------------------------
-
-        GBI_jth_iteration (j, pB_start, pB_end) ;
-        int64_t bjnz = pB_end - pB_start ;
-        // no work to do if B(:,j) is empty
-        if (bjnz == 0) continue ;
-
-        //----------------------------------------------------------------------
-        // get M(:,j)
-        //----------------------------------------------------------------------
-
-        #ifdef GB_MASK_CASE
-        // find vector j in M
-        int64_t pM, pM_end ;
-        GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright, j, &pM, &pM_end) ;
-        // no work to do if M(:,j) is empty
-        if (pM == pM_end) continue ;
-
-        // M(:,j) has at least one entry; get the first and last index in M(:,j)
-        int64_t im_first = Mi [pM] ;
-        int64_t im_last  = Mi [pM_end-1] ;
-        #endif
-
-        //----------------------------------------------------------------------
-        // trim Ah on right
-        //----------------------------------------------------------------------
-
-        // Ah [0..A->nvec-1] holds the set of non-empty vectors of A, but only
-        // vectors k corresponding to nonzero entries B(k,j) are accessed for
-        // this vector B(:,j).  If nnz (B(:,j)) > 2, prune the search space on
-        // the right, so the remaining calls to GB_lookup will only need to
-        // search Ah [pleft...pright-1].  pright does not change.  pleft is
-        // advanced as the Heap is built, since the indices in B(:,j) are
-        // sorted in ascending order.
-
-        int64_t pleft = 0 ;
-        int64_t pright = anvec-1 ;
-        if (A_is_hyper && bjnz > 2)
-        { 
-            // trim Ah [0..pright] to remove any entries past the last B(:,j)
-            GB_bracket_right (Bi [pB_end-1], Ah, 0, &pright) ;
-        }
-
-        //----------------------------------------------------------------------
-        // build the Heap
-        //----------------------------------------------------------------------
-
-        // Construct a Heap containing each vector A(:,k) for which B(k,j) is
-        // nonzero.
-
-        // The key of an GB_Element in the Heap is the index i at the head of
-        // the A(:,k) list.  The name each Elemens is the corresponding entry
-        // in B(:,j) with index k.  The name is kk if B(k,j) is the (kk)th
-        // nonzero in B(:,j), if kk is in the range 0 to bjnz-1.
-
-        // each array is of size bjnz_max
-        // int64_t pA_pair [0..bjnz-1] ;
-        // GB_Element Heap [1..bjnz] ;     note that Heap [0] is not valid
-        // int64_t List [0..bjnz-1] ;
-        int64_t nheap = 0 ;
-        ASSERT (bjnz <= bjnz_max) ;
-
-        for (int64_t pB = pB_start ; pB < pB_end ; pB++)
-        { 
-            // B(k,j) is nonzero
-            int64_t k = Bi [pB] ;
-
-            // find A(:,k), reusing pleft since Bi [...] is sorted
-            int64_t pA, pA_end ;
-            GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA, &pA_end) ;
-
-            // skip if A(:,k) empty
-            if (pA == pA_end) continue ;
-
-            #ifdef GB_MASK_CASE
-            // A(:,k) is non-empty; get the first and last index of A(:,k)
-            int64_t alo = Ai [pA] ;
-            int64_t ahi = Ai [pA_end-1] ;
-
-            // skip if the intersection of A(:,k) and M(:,j) is empty
-            if (ahi < im_first || alo > im_last) continue ;
-
-            // skip past all rows in A(:,k) that are < im_first
-            for ( ; pA < pA_end && Ai [pA] < im_first ; pA++) ;
-
-            // skip if remainder of A(:,k) is empty
-            if (pA == pA_end) continue ;
-            #endif
-
-            // A(:,k) not empty; add the first index in A(:,k) to the heap
-            ++nheap ;
-
-            // k is renamed kk.  B(k,j) is the kk-th nonzero in B(:,j)
-            int64_t kk = pB - pB_start ;
-            ASSERT (kk >= 0 && kk < bjnz) ;
-            Heap [nheap].key  = Ai [pA] ;
-            Heap [nheap].name = kk ;
-
-            // keep track of the start and end of A(:,k)
-            pA_pair [kk].start = pA ;
-            pA_pair [kk].end   = pA_end ;
-        }
-
-        GB_heap_build (Heap, nheap) ;
-
-        // keep track of the number live nodes in the Heap
-        int64_t nlive = nheap ;
-
-        //----------------------------------------------------------------------
-        // C (:,j) = A (:,K) * B(K,j), for all indices K = find (B (:,j))
-        //----------------------------------------------------------------------
-
-        #ifdef GB_MASK_CASE
-        while (nheap > 0)   // iterate until all A(:,k) are done
-        #else
-        while (nheap > 1)   // iterate until only one A(:,k) is left
-        #endif
-        {
-
-            //------------------------------------------------------------------
-            // C(i,j) = A (i,List)' * B (List,j)
-            //------------------------------------------------------------------
-
-            // The method scans the Heap to find all nodes with minimum key.
-            // This key is the row index i.  Each of these nodes represents a
-            // vector A(:,k) for which the topmost row index is A(i,k). The
-            // nodes are placed in a List, in topological order.  Next, a dot
-            // product is computed, cij = A (i,List)' * B (List,j), for all
-            // nodes in the List.  As each node is processed, its key is
-            // updated to the next row index in the vector, and the node is
-            // reheapified.  The reheapify is done in reverse topological order
-            // so that the min-heap property is preserved.
-
-            //------------------------------------------------------------------
-            // get the List of all nodes with minimum key
-            //------------------------------------------------------------------
-
-            int64_t nlist ;
-            int64_t i = GB_heap_getminlist (Heap, nheap, List, &nlist) ;
-            ASSERT (i >= 0 && i < cvlen) ;
-
-            //------------------------------------------------------------------
-            // get the mask M(i,j)
-            //------------------------------------------------------------------
-
-            #ifdef GB_MASK_CASE
-            // get M(i,j) and advance the mask
-            for ( ; pM < pM_end && Mi [pM] < i ; pM++) ;
-            if (pM >= pM_end)
-            { 
-                // M(:,j) is exhausted; C(:,j) is done
-                nheap = 0 ;
-                break ;
-            }
-            bool mij = false ;
-            if (i == Mi [pM])
-            { 
-                cast_M (&mij, Mx +(pM*msize), 0) ;
-                pM++ ;
-            }
-            int64_t im_next = (pM < pM_end) ? Mi [pM] : cvlen ;
-            #endif
-
-            //------------------------------------------------------------------
-            // ensure enough space exists in C
-            //------------------------------------------------------------------
-
-            #ifdef GB_MASK_CASE
-            // C->nzmax == nnz (M) + 1, so cnz < C->nzmax will always hold
-            ASSERT (cnz < C->nzmax) ;
-            #else
-            {
-                // ensure enough space exists in C
-                if (cnz == C->nzmax)
-                {
-                    GrB_Info info = GB_ix_realloc (C, 2*(C->nzmax), true, NULL);
-                    if (info != GrB_SUCCESS)
-                    { 
-                        // out of memory
-                        ASSERT (!(C->enqueued)) ;
-                        GB_free (Chandle) ;
-                        GB_HEAP_FREE_WORK ;
-                        return (info) ;
-                    }
-                    Ci = C->i ;
-                    Cx = C->x ;
-                    // reacquire the pointer cij since C->x has moved
-                    GB_CIJ_REACQUIRE (cij, cnz) ;
-                }
-            }
-            #endif
-
-            //------------------------------------------------------------------
-            // cij = A (i,List)' * B (List,j), in topological order
-            //------------------------------------------------------------------
-
-            for (int64_t klist = nlist-1 ; klist >= 0 ; klist--)
-            {
-
-                //--------------------------------------------------------------
-                // get node p from the List, which defines A(:,k)
-                //--------------------------------------------------------------
-
-                // the index k is implicit; it has been renamed as kk if
-                // B(k,j) is the kk-th nonzero in B(:,j).
-                // get k from the list and the position of A(i,k)
-                int64_t p = List [klist] ;      ASSERT (p >= 1 && p <= nheap) ;
-                int64_t kk = Heap [p].name ;    ASSERT (kk >= 0 && kk < bjnz) ;
-                int64_t pA     = pA_pair [kk].start ;
-                int64_t pA_end = pA_pair [kk].end ;
-                ASSERT (Ai [pA] == i) ;
-
-                //--------------------------------------------------------------
-                // C(i,j) += A(i,k) * B(k,j)
-                //--------------------------------------------------------------
-
-                // This is a dot product of A(i,:)' and B(:,j), but unlike the
-                // dot product method, this loop cannot terminate early for
-                // operators such as logical OR, or FIRST.  Each vector A(:,k)
-                // must be advanced in the Heap.  The numerical work, below,
-                // could be skipped, but this is trivial for built-in
-                // operators.  Early termination cannot be exploited for
-                // user-defined semirings since their properties are unknown.
-
-                #ifdef GB_MASK_CASE
-                if (mij)
-                #endif
-                {
-                    GB_GETA (aik, Ax, pA) ;
-                    GB_GETB (bkj, Bx, pB_start + kk) ;
-                    if (klist == nlist-1)
-                    { 
-                        // first entry: cij = A(i,k) * B(k,j)
-                        GB_MULT (cij, aik, bkj) ;
-                    }
-                    else
-                    { 
-                        // cij += A(i,k) * B(k,j)
-                        GB_MULTADD (cij, aik, bkj) ;
-                    }
-                }
-
-                //--------------------------------------------------------------
-                // move to the next entry in A(:,k)
-                //--------------------------------------------------------------
-
-                #ifdef GB_MASK_CASE
-                // skip past all rows in A(:,k) that are < im_next
-                if (im_next == cvlen)
-                { 
-                    // M(:,j) is exhausted and thus A(:,j) is done too
-                    pA = pA_end ;
-                }
-                else
-                { 
-                    for ( ; pA < pA_end && Ai [pA] < im_next ; pA++) ;
-                }
-                #else
-                // advance to the next row in A(:,k)
-                pA++ ;
-                #endif
-
-                //--------------------------------------------------------------
-                // put A(:,k) back in the Heap
-                //--------------------------------------------------------------
-
-                pA_pair [kk].start = pA ;
-                bool do_heapify = true ;
-                if (pA < pA_end)
-                { 
-                    // advance p to the next entry in A(:,k) or M(:,j).
-                    // kk < bjnz refers to A(:,k), and kk=bjnz is M(:,j).
-                    Heap [p].key = Ai [pA] ;
-                    ASSERT (Heap [p].key > i && Heap [p].key < cvlen) ;
-                }
-                else
-                {
-                    // A(:,k) is exhausted.  Either delete it from the Heap
-                    // if safe to do so, or give it a maximal key.
-                    ASSERT (nheap > 0) ;
-                    if (Heap [nheap].key > i)
-                    { 
-                        // safe to delete p from the Heap
-                        GB_heap_delete (p, Heap, &nheap) ;
-                        do_heapify = false ;
-                    }
-                    else
-                    { 
-                        // Heap [nheap].key == i, so the last node in the
-                        // Heap is an entry in the List [0..nlist-1] that
-                        // has not yet been processed in this for-loop.  It
-                        // is not safe to delete.  Give node p a maximal
-                        // key so and heapify it is no longer considered.
-                        Heap [p].key = cvlen ;
-                    }
-                    // one less live node in the Heap
-                    --nlive ;
-                }
-                if (do_heapify)
-                { 
-                    GB_heapify (p, Heap, nheap) ;
-                }
-            }
-
-            //------------------------------------------------------------------
-            // prune the Heap if mostly dead
-            //------------------------------------------------------------------
-
-            ASSERT (GB_IMPLIES (nheap > 0, nlive <= nheap && nlive >= 0)) ;
-
-            if (nlive == 0)
-            { 
-                // nothing is left
-                nheap = 0 ;
-            }
-
-            if (nheap > 0 && 2*nlive < nheap)
-            {
-                // less than half of the Heap is alive.  Prune the dead.
-                // This step also ensures that Heap [1].key is never == cvlen,
-                // since that would mean nlive == 0 and nheap > 0.
-                int64_t nheap_pruned = 0 ;
-                for (int64_t p = 1 ; p <= nheap ; p++)
-                {
-                    if (Heap [p].key < cvlen)
-                    { 
-                        // keep this element in the Heap
-                        Heap [++nheap_pruned] = Heap [p] ;
-                    }
-                }
-                ASSERT (nheap_pruned == nlive) ;
-                nheap = nheap_pruned ;
-                nlive = nheap ;
-
-                // rebuild the Heap
-                GB_heap_build (Heap, nheap) ;
-            }
-
-            //------------------------------------------------------------------
-            // insert C(i,j) into C
-            //------------------------------------------------------------------
-
-            #ifdef GB_MASK_CASE
-            if (mij)
-            #endif
-            { 
-                Ci [cnz] = i ;
-                // Cx [cnz] = cij ;
-                GB_CIJ_SAVE (cij, cnz) ;
-                cnz++ ;
-            }
-        }
-
-        //----------------------------------------------------------------------
-        // handle the last A(:,k)
-        //----------------------------------------------------------------------
-
-        // This phase is done only if the mask is not present.  If the mask is
-        // present, the while loop above terminates only when the Heap is
-        // empty.
-
-        #ifndef GB_MASK_CASE
-        if (nheap == 1)
-        {
-            // get the last A(:,k)
-            #ifdef GB_DEBUG
-            int64_t ilast = Heap [1].key ;
-            #endif
-            ASSERT (ilast >= 0 && ilast < cvlen) ;
-            int64_t kk = Heap [1].name ;
-            ASSERT (kk >= 0 && kk < bjnz) ;
-
-            int64_t pA     = pA_pair [kk].start ;
-            int64_t pA_end = pA_pair [kk].end ;
-            ASSERT (ilast == Ai [pA]) ;
-
-            // number of entries left in this last A(:,k)
-            int64_t aknz = pA_end - pA ;
-
-            // ensure enough space exists in C
-            if (cnz + aknz > C->nzmax)
-            {
-                GrB_Info info = GB_ix_realloc (C, 2*(cnz + aknz), true, NULL) ;
-                if (info != GrB_SUCCESS)
-                { 
-                    // out of memory
-                    ASSERT (!(C->enqueued)) ;
-                    GB_free (Chandle) ;
-                    GB_HEAP_FREE_WORK ;
-                    return (info) ;
-                }
-                Ci = C->i ;
-                Cx = C->x ;
-                // reacquire cij since C->x has moved
-                GB_CIJ_REACQUIRE (cij, cnz) ;
-            }
-
-            // bkj = Bx [ ] ;
-            GB_GETB (bkj, Bx, pB_start + kk) ;
-
-            // C(ilast:end,j) = A (ilast:end,k) * B (k,j)
-            for ( ; pA < pA_end ; pA++)
-            { 
-                // get A(i,k) and B(k,j) and do the numerical work
-                int64_t i = Ai [pA] ;
-
-                // cij = A(i,k) * B(k,j)
-                GB_GETA (aik, Ax, pA) ;
-                GB_MULT (cij, aik, bkj) ;
-
-                Ci [cnz] = i ;
-                // Cx [cnz] = cij ;
-                GB_CIJ_SAVE (cij, cnz) ;
-                cnz++ ;
-            }
-        }
-        #endif
-
-        //----------------------------------------------------------------------
-        // log the end of vector C(:,j)
-        //----------------------------------------------------------------------
-
-        // this cannot fail since C->plen is the upper bound: the number
-        // of non-empty vectors of B.
-        info = GB_jappend (C, j, &jlast, cnz, &cnz_last, NULL) ;
-        ASSERT (info == GrB_SUCCESS) ;
-    }
-
-    //--------------------------------------------------------------------------
-    // finish construction of C
-    //--------------------------------------------------------------------------
-
-    GB_jwrapup (C, jlast, cnz) ;    // finalize Cp and Ch
-}
-
diff --git a/Source/Template/GB_AxB_heap_meta.c b/Source/Template/GB_AxB_heap_meta.c
deleted file mode 100644
index 7a30ecb4f4..0000000000
--- a/Source/Template/GB_AxB_heap_meta.c
+++ /dev/null
@@ -1,28 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_AxB_heap_meta: compute C<M>=A*B or C=A*B using a heap-based method
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-{
-
-    const GB_ATYPE *GB_RESTRICT Ax = A_is_pattern ? NULL : A->x ;
-    const GB_BTYPE *GB_RESTRICT Bx = B_is_pattern ? NULL : B->x ;
-
-    if (M != NULL)
-    { 
-        // C<M> = A*B via a heap
-        #define GB_MASK_CASE
-        #include "GB_AxB_heap_mask.c"
-        #undef GB_MASK_CASE
-    }
-    else
-    { 
-        // C = A*B via the heap
-        #include "GB_AxB_heap_mask.c"
-    }
-}
-
diff --git a/Source/Template/GB_AxB_rowscale_meta.c b/Source/Template/GB_AxB_rowscale_meta.c
index 36b3a340b0..37e7ac491a 100644
--- a/Source/Template/GB_AxB_rowscale_meta.c
+++ b/Source/Template/GB_AxB_rowscale_meta.c
@@ -2,7 +2,7 @@
 // GB_AxB_rowscale_meta: C=D*B where D is a square diagonal matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,7 +10,7 @@
 // All entries in C=D*B are computed fully in parallel. 
 
 {
-    // Bx is unused if the operator is FIRST
+    // Bx is unused if the operator is FIRST or PAIR
     #include "GB_unused.h"
 
     //--------------------------------------------------------------------------
diff --git a/Source/Template/GB_AxB_saxpy3_template.c b/Source/Template/GB_AxB_saxpy3_template.c
new file mode 100644
index 0000000000..1ca833c7da
--- /dev/null
+++ b/Source/Template/GB_AxB_saxpy3_template.c
@@ -0,0 +1,1383 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_template: C=A*B, C<M>=A*B, or C<!M>=A*B via saxpy3 method
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// GB_AxB_saxpy3_template.c computes C=A*B for any semiring and matrix types.
+// It is #include'd in GB_AxB_saxpy3 to construct the generic method (for
+// arbitary user-defined operators and/or typecasting), and in the hard-coded
+// GB_Asaxpy3B* workers in the Generated/ folder.
+
+#include "GB_unused.h"
+
+//------------------------------------------------------------------------------
+// template code for C=A*B via the saxpy3 method
+//------------------------------------------------------------------------------
+
+{
+
+    //--------------------------------------------------------------------------
+    // get the chunk size
+    //--------------------------------------------------------------------------
+
+    GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
+
+    //--------------------------------------------------------------------------
+    // get M, A, B, and C
+    //--------------------------------------------------------------------------
+
+    int64_t *GB_RESTRICT Cp = C->p ;
+    // const int64_t *GB_RESTRICT Ch = C->h ;
+    const int64_t cvlen = C->vlen ;
+    const int64_t cnvec = C->nvec ;
+
+    const int64_t *GB_RESTRICT Bp = B->p ;
+    const int64_t *GB_RESTRICT Bh = B->h ;
+    const int64_t *GB_RESTRICT Bi = B->i ;
+    const GB_BTYPE *GB_RESTRICT Bx = B_is_pattern ? NULL : B->x ;
+    // const int64_t bvlen = B->vlen ;
+    // const int64_t bnvec = B->nvec ;
+    // const bool B_is_hyper = B->is_hyper ;
+
+    const int64_t *GB_RESTRICT Ap = A->p ;
+    const int64_t *GB_RESTRICT Ah = A->h ;
+    const int64_t *GB_RESTRICT Ai = A->i ;
+    const int64_t anvec = A->nvec ;
+    const bool A_is_hyper = GB_IS_HYPER (A) ;
+    const GB_ATYPE *GB_RESTRICT Ax = A_is_pattern ? NULL : A->x ;
+
+    const int64_t *GB_RESTRICT Mp = NULL ;
+    const int64_t *GB_RESTRICT Mh = NULL ;
+    const int64_t *GB_RESTRICT Mi = NULL ;
+    const GB_void *GB_RESTRICT Mx = NULL ;
+    size_t msize = 0 ;
+    int64_t mnvec = 0 ;
+    bool M_is_hyper = false ;
+    if (M != NULL)
+    { 
+        Mp = M->p ;
+        Mh = M->h ;
+        Mi = M->i ;
+        Mx = (Mask_struct ? NULL : (M->x)) ;
+        msize = M->type->size ;
+        mnvec = M->nvec ;
+        M_is_hyper = M->is_hyper ;
+    }
+
+    // 3 cases:
+    //      M not present and Mask_comp false: compute C=A*B
+    //      M present     and Mask_comp false: compute C<M>=A*B
+    //      M present     and Mask_comp true : compute C<!M>=A*B
+    // If M is NULL on input, then Mask_comp is also false on input.
+
+    bool mask_is_M = (M != NULL && !Mask_comp) ;
+
+    //==========================================================================
+    // phase2: numeric work for fine tasks
+    //==========================================================================
+
+    // Coarse tasks: nothing to do in phase2.
+    // Fine tasks: compute nnz (C(:,j)), and values in Hx via atomics.
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < nfine ; taskid++)
+    {
+
+        //----------------------------------------------------------------------
+        // get the task descriptor
+        //----------------------------------------------------------------------
+
+        int64_t kk = TaskList [taskid].vector ;
+        int64_t hash_size = TaskList [taskid].hsize ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+        int64_t pB     = TaskList [taskid].start ;
+        int64_t pB_end = TaskList [taskid].end + 1 ;
+        #if !GB_IS_ANY_PAIR_SEMIRING
+        GB_CTYPE *GB_RESTRICT Hx = (GB_CTYPE *) TaskList [taskid].Hx ;
+        #endif
+        int64_t pleft = 0, pright = anvec-1 ;
+
+        if (use_Gustavson)
+        {
+
+            //------------------------------------------------------------------
+            // phase2: fine Gustavson task
+            //------------------------------------------------------------------
+
+            // Hf [i] == 0: unlocked, i has not been seen in C(:,j).
+            //      Hx [i] is not initialized.
+            //      M(i,j) is 0, or M is not present.
+            //      if M: Hf [i] stays equal to 0 (or 3 if locked)
+            //      if !M, or no M: C(i,j) is a new entry seen for 1st time
+
+            // Hf [i] == 1: unlocked, i has not been seen in C(:,j).
+            //      Hx [i] is not initialized.  M is present.
+            //      M(i,j) is 1. (either M or !M case)
+            //      if M: C(i,j) is a new entry seen for the first time.
+            //      if !M: Hf [i] stays equal to 1 (or 3 if locked)
+
+            // Hf [i] == 2: unlocked, i has been seen in C(:,j).
+            //      Hx [i] is initialized.  This case is independent of M.
+
+            // Hf [i] == 3: locked.  Hx [i] cannot be accessed.
+
+            uint8_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+
+            if (M == NULL)
+            {
+
+                //--------------------------------------------------------------
+                // phase2: fine Gustavson task, C=A*B
+                //--------------------------------------------------------------
+
+                // Hf [i] is initially 0.
+
+                // 0 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                {
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    if (aknz == 0) continue ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+                    // scan A(:,k)
+                    for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                    {
+                        int64_t i = Ai [pA] ;    // get A(i,k)
+                        GB_MULT_A_ik_B_kj ;      // t = A(i,k) * B(k,j)
+                        uint8_t f ;
+
+                        #if GB_IS_ANY_MONOID
+
+                        GB_ATOMIC_READ
+                        f = Hf [i] ;            // grab the entry
+                        if (f == 2) continue ;  // check if already updated
+                        GB_ATOMIC_WRITE
+                        Hf [i] = 2 ;                // flag the entry
+                        GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t
+
+                        #else
+
+                        #if GB_HAS_ATOMIC
+                        GB_ATOMIC_READ
+                        f = Hf [i] ;            // grab the entry
+                        if (f == 2)             // if true, update C(i,j)
+                        { 
+                            GB_ATOMIC_UPDATE_HX (i, t) ;   // Hx [i] += t
+                            continue ;          // C(i,j) has been updated
+                        }
+                        #endif
+                        do  // lock the entry
+                        {
+                            GB_ATOMIC_CAPTURE
+                            {
+                                f = Hf [i] ; Hf [i] = 3 ;
+                            }
+                        } while (f == 3) ; // lock owner gets f=0 or 2
+                        if (f == 0)
+                        { 
+                            // C(i,j) is a new entry
+                            GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t
+                        }
+                        else // f == 2
+                        { 
+                            // C(i,j) already appears in C(:,j)
+                            GB_ATOMIC_UPDATE_HX (i, t) ;   // Hx [i] += t
+                        }
+                        GB_ATOMIC_WRITE
+                        Hf [i] = 2 ;                // unlock the entry
+
+                        #endif
+                    }
+                }
+
+            }
+            else if (mask_is_M)
+            {
+
+                //--------------------------------------------------------------
+                // phase2: fine Gustavson task, C<M>=A*B
+                //--------------------------------------------------------------
+
+                // Hf [i] is 0 if M(i,j) not present or M(i,j)=0.
+                // 0 -> 1 : has already been done in phase0 if M(i,j)=1
+
+                // 0 -> 0 : to ignore, if M(i,j)=0
+                // 1 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                GB_GET_M_j ;                // get M(:,j)
+                GB_GET_M_j_RANGE (16) ;     // get first and last in M(:,j)
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                { 
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+
+                    #if GB_IS_ANY_MONOID
+
+                    #define GB_IKJ                                             \
+                        uint8_t f ;                                            \
+                        GB_ATOMIC_READ                                         \
+                        f = Hf [i] ;            /* grab the entry */           \
+                        if (f == 0 || f == 2) continue ;                       \
+                        GB_ATOMIC_WRITE                                        \
+                        Hf [i] = 2 ;            /* unlock the entry */         \
+                        GB_MULT_A_ik_B_kj ;     /* t = A(i,k) * B(k,j) */      \
+                        GB_ATOMIC_WRITE_HX (i, t) ;    /* Hx [i] = t */
+
+                    #else
+
+                    #define GB_IKJ                                             \
+                    {                                                          \
+                        GB_MULT_A_ik_B_kj ;     /* t = A(i,k) * B(k,j) */      \
+                        uint8_t f ;                                            \
+                        GB_ATOMIC_READ                                         \
+                        f = Hf [i] ;            /* grab the entry */           \
+                        if (GB_HAS_ATOMIC && (f == 2))                         \
+                        {                                                      \
+                            /* C(i,j) already seen; update it */               \
+                            GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */     \
+                            continue ;       /* C(i,j) has been updated */     \
+                        }                                                      \
+                        if (f == 0) continue ; /* M(i,j)=0; ignore C(i,j)*/    \
+                        do  /* lock the entry */                               \
+                        {                                                      \
+                            GB_ATOMIC_CAPTURE                                  \
+                            {                                                  \
+                                f = Hf [i] ; Hf [i] = 3 ;                      \
+                            }                                                  \
+                        } while (f == 3) ; /* lock owner gets f=1 or 2 */      \
+                        if (f == 1)                                            \
+                        {                                                      \
+                            /* C(i,j) is a new entry */                        \
+                            GB_ATOMIC_WRITE_HX (i, t) ; /* Hx [i] = t */       \
+                        }                                                      \
+                        else /* f == 2 */                                      \
+                        {                                                      \
+                            /* C(i,j) already appears in C(:,j) */             \
+                            GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */     \
+                        }                                                      \
+                        GB_ATOMIC_WRITE                                        \
+                        Hf [i] = 2 ;                /* unlock the entry */     \
+                    }
+                    #endif
+
+                    #define GB_IKJ_VECTORIZE
+                    #define GB_IKJ_IVDEP
+                    GB_SCAN_M_j_OR_A_k ;
+                    #undef GB_IKJ_VECTORIZE
+                    #undef GB_IKJ_IVDEP
+                    #undef GB_IKJ
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // phase2: fine Gustavson task, C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // Hf [i] is 0 if M(i,j) not present or M(i,j)=0.
+                // 0 -> 1 : has already been done in phase0 if M(i,j)=1
+
+                // 1 -> 1 : to ignore, if M(i,j)=1
+                // 0 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                {
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    if (aknz == 0) continue ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+                    // scan A(:,k)
+                    for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                    {
+                        int64_t i = Ai [pA] ;   // get A(i,k)
+                        GB_MULT_A_ik_B_kj ;     // t = A(i,k) * B(k,j)
+                        uint8_t f ;
+
+                        #if GB_IS_ANY_MONOID
+
+                        GB_ATOMIC_READ
+                        f = Hf [i] ;            // grab the entry
+                        if (f == 1 || f == 2) continue ;
+                        GB_ATOMIC_WRITE
+                        Hf [i] = 2 ;                // unlock the entry
+                        GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t
+
+                        #else
+
+                        GB_ATOMIC_READ
+                        f = Hf [i] ;            // grab the entry
+                        #if GB_HAS_ATOMIC
+                        if (f == 2)             // if true, update C(i,j)
+                        { 
+                            GB_ATOMIC_UPDATE_HX (i, t) ;   // Hx [i] += t
+                            continue ;          // C(i,j) has been updated
+                        }
+                        #endif
+                        if (f == 1) continue ; // M(i,j)=1; ignore C(i,j)
+                        do  // lock the entry
+                        {
+                            GB_ATOMIC_CAPTURE
+                            {
+                                f = Hf [i] ; Hf [i] = 3 ;
+                            }
+                        } while (f == 3) ; // lock owner of gets f=0 or 2
+                        if (f == 0)
+                        { 
+                            // C(i,j) is a new entry
+                            GB_ATOMIC_WRITE_HX (i, t) ;    // Hx [i] = t
+                        }
+                        else // f == 2
+                        { 
+                            // C(i,j) already seen
+                            GB_ATOMIC_UPDATE_HX (i, t) ;   // Hx [i] += t
+                        }
+                        GB_ATOMIC_WRITE
+                        Hf [i] = 2 ;                // unlock the entry
+                        #endif
+                    }
+                }
+            }
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // phase2: fine hash task
+            //------------------------------------------------------------------
+
+            // Each hash entry Hf [hash] splits into two parts, (h,f).  f
+            // is in the 2 least significant bits.  h is 62 bits, and is
+            // the 1-based index i of the C(i,j) entry stored at that
+            // location in the hash table.
+
+            // If M is present (M or !M), and M(i,j)=1, then (i+1,1)
+            // has been inserted into the hash table, in phase0.
+
+            // Given Hf [hash] split into (h,f)
+
+            // h == 0, f == 0: unlocked and unoccupied.
+            //                  note that if f=0, h must be zero too.
+
+            // h == i+1, f == 1: unlocked, occupied by M(i,j)=1.
+            //                  C(i,j) has not been seen, or is ignored.
+            //                  Hx is not initialized.  M is present.
+            //                  if !M: this entry will be ignored in C.
+
+            // h == i+1, f == 2: unlocked, occupied by C(i,j).
+            //                  Hx is initialized.  M is no longer
+            //                  relevant.
+
+            // h == (anything), f == 3: locked.
+
+            int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+            int64_t hash_bits = (hash_size-1) ;
+
+            if (M == NULL)
+            {
+
+                //--------------------------------------------------------------
+                // phase2: fine hash task, C=A*B
+                //--------------------------------------------------------------
+
+                // Given Hf [hash] split into (h,f)
+
+                // h == 0  , f == 0 : unlocked and unoccupied.
+                // h == i+1, f == 2 : unlocked, occupied by C(i,j).
+                //                    Hx is initialized.
+                // h == ..., f == 3 : locked.
+
+                // 0 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                {
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    if (aknz == 0) continue ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+                    // scan A(:,k)
+                    for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                    {
+                        int64_t i = Ai [pA] ;       // get A(i,k)
+                        GB_MULT_A_ik_B_kj ;         // t = A(i,k) * B(k,j)
+                        int64_t i1 = i + 1 ;        // i1 = one-based index
+                        int64_t i_unlocked = (i1 << 2) + 2 ;    // (i+1,2)
+                        for (GB_HASH (i))           // find i in hash table
+                        {
+                            int64_t hf ;
+                            GB_ATOMIC_READ
+                            hf = Hf [hash] ;        // grab the entry
+                            #if GB_HAS_ATOMIC
+                            if (hf == i_unlocked)  // if true, update C(i,j)
+                            { 
+                                GB_ATOMIC_UPDATE_HX (hash, t) ;// Hx [.]+=t
+                                break ;         // C(i,j) has been updated
+                            }
+                            #endif
+                            int64_t h = (hf >> 2) ;
+                            if (h == 0 || h == i1)
+                            {
+                                // h=0: unoccupied, h=i1: occupied by i
+                                do  // lock the entry
+                                {
+                                    GB_ATOMIC_CAPTURE
+                                    {
+                                        hf = Hf [hash] ; Hf [hash] |= 3 ;
+                                    }
+                                } while ((hf & 3) == 3) ; // owner: f=0 or 2
+                                if (hf == 0) // f == 0
+                                { 
+                                    // C(i,j) is a new entry in C(:,j)
+                                    // Hx [hash] = t
+                                    GB_ATOMIC_WRITE_HX (hash, t) ;
+                                    GB_ATOMIC_WRITE
+                                    Hf [hash] = i_unlocked ; // unlock entry
+                                    break ;
+                                }
+                                if (hf == i_unlocked) // f == 2
+                                { 
+                                    // C(i,j) already appears in C(:,j)
+                                    // Hx [hash] += t
+                                    GB_ATOMIC_UPDATE_HX (hash, t) ;
+                                    GB_ATOMIC_WRITE
+                                    Hf [hash] = i_unlocked ; // unlock entry
+                                    break ;
+                                }
+                                // hash table occupied, but not with i
+                                GB_ATOMIC_WRITE
+                                Hf [hash] = hf ;  // unlock with prior value
+                            }
+                        }
+                    }
+                }
+
+            }
+            else if (mask_is_M)
+            {
+
+                //--------------------------------------------------------------
+                // phase2: fine hash task, C<M>=A*B
+                //--------------------------------------------------------------
+
+                // Given Hf [hash] split into (h,f)
+
+                // h == 0  , f == 0 : unlocked, unoccupied. C(i,j) ignored
+                // h == i+1, f == 1 : unlocked, occupied by M(i,j)=1.
+                //                    C(i,j) has not been seen.
+                //                    Hx is not initialized.
+                // h == i+1, f == 2 : unlocked, occupied by C(i,j), M(i,j)=1
+                //                    Hx is initialized.
+                // h == ..., f == 3 : locked.
+
+                // 0 -> 0 : to ignore, if M(i,j)=0
+                // 1 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                GB_GET_M_j ;                // get M(:,j)
+                GB_GET_M_j_RANGE (16) ;     // get first and last in M(:,j)
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                { 
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+                    #define GB_IKJ_VECTORIZE
+                    #define GB_IKJ_IVDEP
+                    #define GB_IKJ                                             \
+                    {                                                          \
+                        GB_MULT_A_ik_B_kj ;      /* t = A(i,k) * B(k,j) */     \
+                        int64_t i1 = i + 1 ;     /* i1 = one-based index */    \
+                        int64_t i_unlocked = (i1 << 2) + 2 ;  /* (i+1,2) */    \
+                        for (GB_HASH (i))        /* find i in hash table */    \
+                        {                                                      \
+                            int64_t hf ;                                       \
+                            GB_ATOMIC_READ                                     \
+                            hf = Hf [hash] ;        /* grab the entry */       \
+                            if (GB_HAS_ATOMIC && (hf == i_unlocked))           \
+                            {                                                  \
+                                /* Hx [hash] += t */                           \
+                                GB_ATOMIC_UPDATE_HX (hash, t) ;                \
+                                break ;     /* C(i,j) has been updated */      \
+                            }                                                  \
+                            if (hf == 0) break ; /* M(i,j)=0; ignore Cij */    \
+                            if ((hf >> 2) == i1) /* if true, i found */        \
+                            {                                                  \
+                                do /* lock the entry */                        \
+                                {                                              \
+                                    GB_ATOMIC_CAPTURE                          \
+                                    {                                          \
+                                        hf = Hf [hash] ; Hf [hash] |= 3 ;      \
+                                    }                                          \
+                                } while ((hf & 3) == 3) ; /* own: f=1,2 */     \
+                                if ((hf & 3) == 1) /* f == 1 */                \
+                                {                                              \
+                                    /* C(i,j) is a new entry in C(:,j) */      \
+                                    /* Hx [hash] = t */                        \
+                                    GB_ATOMIC_WRITE_HX (hash, t) ;             \
+                                }                                              \
+                                else /* f == 2 */                              \
+                                {                                              \
+                                    /* C(i,j) already appears in C(:,j) */     \
+                                    /* Hx [hash] += t */                       \
+                                    GB_ATOMIC_UPDATE_HX (hash, t) ;            \
+                                }                                              \
+                                GB_ATOMIC_WRITE                                \
+                                Hf [hash] = i_unlocked ; /* unlock entry */    \
+                                break ;                                        \
+                            }                                                  \
+                        }                                                      \
+                    }
+                    GB_SCAN_M_j_OR_A_k ;
+                    #undef GB_IKJ_VECTORIZE
+                    #undef GB_IKJ_IVDEP
+                    #undef GB_IKJ
+                }
+
+            }
+            else
+            { 
+
+                //--------------------------------------------------------------
+                // phase2: fine hash task, C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // Given Hf [hash] split into (h,f)
+
+                // h == 0  , f == 0 : unlocked and unoccupied.
+                // h == i+1, f == 1 : unlocked, occupied by M(i,j)=1.
+                //                    C(i,j) is ignored.
+                // h == i+1, f == 2 : unlocked, occupied by C(i,j).
+                //                    Hx is initialized.
+
+                // h == (anything), f == 3: locked.
+
+                // 1 -> 1 : to ignore, if M(i,j)=1
+                // 0 -> 3 : to lock, if i seen for first time
+                // 2 -> 3 : to lock, if i seen already
+                // 3 -> 2 : to unlock; now i has been seen
+
+                for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                {
+                    int64_t k = Bi [pB] ;       // get B(k,j)
+                    GB_GET_A_k ;                // get A(:,k)
+                    if (aknz == 0) continue ;
+                    GB_GET_B_kj ;               // bkj = B(k,j)
+                    // scan A(:,k)
+                    for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                    {
+                        int64_t i = Ai [pA] ;       // get A(i,k)
+                        GB_MULT_A_ik_B_kj ;         // t = A(i,k) * B(k,j)
+                        int64_t i1 = i + 1 ;        // i1 = one-based index
+                        int64_t i_unlocked = (i1 << 2) + 2 ;    // (i+1,2)
+                        int64_t i_masked   = (i1 << 2) + 1 ;    // (i+1,1)
+                        for (GB_HASH (i))           // find i in hash table
+                        {
+                            int64_t hf ;
+                            GB_ATOMIC_READ
+                            hf = Hf [hash] ;        // grab the entry
+                            #if GB_HAS_ATOMIC
+                            if (hf == i_unlocked)  // if true, update C(i,j)
+                            { 
+                                GB_ATOMIC_UPDATE_HX (hash, t) ;// Hx [.]+=t
+                                break ;         // C(i,j) has been updated
+                            }
+                            #endif
+                            if (hf == i_masked) break ; // M(i,j)=1; ignore
+                            int64_t h = (hf >> 2) ;
+                            if (h == 0 || h == i1)
+                            {
+                                // h=0: unoccupied, h=i1: occupied by i
+                                do // lock the entry
+                                {
+                                    GB_ATOMIC_CAPTURE
+                                    {
+                                        hf = Hf [hash] ; Hf [hash] |= 3 ;
+                                    }
+                                } while ((hf & 3) == 3) ; // owner: f=0,1,2
+                                if (hf == 0)            // f == 0
+                                { 
+                                    // C(i,j) is a new entry in C(:,j)
+                                    // Hx [hash] = t
+                                    GB_ATOMIC_WRITE_HX (hash, t) ;
+                                    GB_ATOMIC_WRITE
+                                    Hf [hash] = i_unlocked ; // unlock entry
+                                    break ;
+                                }
+                                if (hf == i_unlocked)   // f == 2
+                                { 
+                                    // C(i,j) already appears in C(:,j)
+                                    // Hx [hash] += t
+                                    GB_ATOMIC_UPDATE_HX (hash, t) ;
+                                    GB_ATOMIC_WRITE
+                                    Hf [hash] = i_unlocked ; // unlock entry
+                                    break ;
+                                }
+                                // hash table occupied, but not with i,
+                                // or with i but M(i,j)=1 so C(i,j) ignored
+                                GB_ATOMIC_WRITE
+                                Hf [hash] = hf ;  // unlock with prior value
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    //==========================================================================
+    // phase3/phase4: count nnz(C(:,j)) for fine tasks, cumsum of Cp
+    //==========================================================================
+
+    int64_t cjnz_max = GB_AxB_saxpy3_cumsum (C, TaskList,
+        nfine, chunk, nthreads) ;
+
+    //==========================================================================
+    // phase5: numeric phase for coarse tasks, gather for fine tasks
+    //==========================================================================
+
+    // allocate Ci and Cx
+    int64_t cnz = Cp [cnvec] ;
+    GrB_Info info = GB_ix_alloc (C, cnz, true, Context) ;
+    if (info != GrB_SUCCESS)
+    { 
+        // out of memory
+        return (GrB_OUT_OF_MEMORY) ;
+    }
+
+    int64_t  *GB_RESTRICT Ci = C->i ;
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+
+    #if GB_IS_ANY_PAIR_SEMIRING
+
+        // ANY_PAIR semiring: result is purely symbolic
+        int64_t pC ;
+        #pragma omp parallel for num_threads(nthreads) schedule(static)
+        for (pC = 0 ; pC < cnz ; pC++)
+        { 
+            Cx [pC] = 1 ;
+        }
+
+        // Just a precaution; these variables are not used below.  Any attempt
+        // to access them will lead to a compile error.
+        #define Cx is not used
+        #define Hx is not used
+
+        // these have been renamed to ANY_PAIR:
+        // EQ_PAIR
+        // LAND_PAIR
+        // LOR_PAIR
+        // MAX_PAIR
+        // MIN_PAIR
+        // TIMES_PAIR
+
+    #endif
+
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        //----------------------------------------------------------------------
+        // get the task descriptor
+        //----------------------------------------------------------------------
+
+        #if !GB_IS_ANY_PAIR_SEMIRING
+        GB_CTYPE *GB_RESTRICT Hx = (GB_CTYPE *) TaskList [taskid].Hx ;
+        #endif
+        int64_t hash_size = TaskList [taskid].hsize ;
+        bool use_Gustavson = (hash_size == cvlen) ;
+
+        if (taskid < nfine)
+        {
+
+            //------------------------------------------------------------------
+            // fine task: gather pattern and values
+            //------------------------------------------------------------------
+
+            int64_t kk = TaskList [taskid].vector ;
+            int team_size = TaskList [taskid].team_size ;
+            int master    = TaskList [taskid].master ;
+            int my_teamid = taskid - master ;
+            int64_t pC = Cp [kk] ;
+
+            if (use_Gustavson)
+            {
+
+                //--------------------------------------------------------------
+                // phase5: fine Gustavson task, C=A*B, C<M>=A*B, or C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // Hf [i] == 2 if C(i,j) is an entry in C(:,j)
+                uint8_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+
+                int64_t cjnz = Cp [kk+1] - pC ;
+                int64_t istart, iend ;
+                GB_PARTITION (istart, iend, cvlen, my_teamid, team_size) ;
+                if (cjnz == cvlen)
+                {
+                    // C(:,j) is dense
+                    for (int64_t i = istart ; i < iend ; i++)
+                    { 
+                        Ci [pC + i] = i ;
+                    }
+                    #if !GB_IS_ANY_PAIR_SEMIRING
+                    // copy Hx [istart:iend-1] into Cx [pC+istart:pC+iend-1]
+                    GB_CIJ_MEMCPY (pC + istart, istart, iend - istart) ;
+                    #endif
+                }
+                else
+                {
+                    // C(:,j) is sparse
+                    pC += TaskList [taskid].my_cjnz ;
+                    for (int64_t i = istart ; i < iend ; i++)
+                    {
+                        if (Hf [i] == 2)
+                        { 
+                            #if !GB_IS_ANY_PAIR_SEMIRING
+                            GB_CIJ_GATHER (pC, i) ; // Cx [pC] = Hx [i]
+                            #endif
+                            Ci [pC++] = i ;
+                        }
+                    }
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // phase5: fine hash task, C=A*B, C<M>=A*B, C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // (Hf [hash] & 3) == 2 if C(i,j) is an entry in C(:,j),
+                // and the index i of the entry is (Hf [hash] >> 2) - 1.
+
+                int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                int64_t mystart, myend ;
+                GB_PARTITION (mystart, myend, hash_size, my_teamid, team_size) ;
+                pC += TaskList [taskid].my_cjnz ;
+                for (int64_t hash = mystart ; hash < myend ; hash++)
+                {
+                    int64_t hf = Hf [hash] ;
+                    if ((hf & 3) == 2)
+                    { 
+                        int64_t i = (hf >> 2) - 1 ; // found C(i,j) in hash
+                        Ci [pC++] = i ;
+                    }
+                }
+            }
+
+        }
+        else
+        {
+
+            //------------------------------------------------------------------
+            // numeric coarse task: compute C(:,kfirst:klast)
+            //------------------------------------------------------------------
+
+            int64_t *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+            int64_t kfirst = TaskList [taskid].start ;
+            int64_t klast = TaskList [taskid].end ;
+            int64_t nk = klast - kfirst + 1 ;
+            int64_t mark = 2*nk + 1 ;
+
+            if (use_Gustavson)
+            {
+
+                //--------------------------------------------------------------
+                // phase5: coarse Gustavson task
+                //--------------------------------------------------------------
+
+                if (M == NULL)
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse Gustavson task, C=A*B
+                    //----------------------------------------------------------
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_B_j ;                // get B(:,j)
+                        mark++ ;
+                        if (cjnz == cvlen)          // C(:,j) is dense
+                        { 
+                            GB_COMPUTE_DENSE_C_j ;  // C(:,j) = A*B(:,j)
+                        }
+                        else if (bjnz == 1)         // C(:,j) = A(:,k)*B(k,j)
+                        { 
+                            GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE ;
+                        }
+                        else if (16 * cjnz > cvlen) // C(:,j) is not very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            {
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                if (aknz == 0) continue ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                // scan A(:,k)
+                                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                                {
+                                    int64_t i = Ai [pA] ;   // get A(i,k)
+                                    GB_MULT_A_ik_B_kj ;     // t = A(i,k)*B(k,j)
+                                    if (Hf [i] != mark)
+                                    { 
+                                        // C(i,j) = A(i,k) * B(k,j)
+                                        Hf [i] = mark ;
+                                        GB_HX_WRITE (i, t) ;    // Hx [i] = t
+                                    }
+                                    else
+                                    { 
+                                        // C(i,j) += A(i,k) * B(k,j)
+                                        GB_HX_UPDATE (i, t) ;   // Hx [i] += t
+                                    }
+                                }
+                            }
+                            GB_GATHER_ALL_C_j(mark) ;   // gather into C(:,j) 
+                        }
+                        else    // C(:,j) is very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            {
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                if (aknz == 0) continue ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                // scan A(:,k)
+                                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                                {
+                                    int64_t i = Ai [pA] ;   // get A(i,k)
+                                    GB_MULT_A_ik_B_kj ;     // t = A(i,k)*B(k,j)
+                                    if (Hf [i] != mark)
+                                    { 
+                                        // C(i,j) = A(i,k) * B(k,j)
+                                        Hf [i] = mark ;
+                                        GB_HX_WRITE (i, t) ;    // Hx [i] = t
+                                        Ci [pC++] = i ;
+                                    }
+                                    else
+                                    { 
+                                        // C(i,j) += A(i,k) * B(k,j)
+                                        GB_HX_UPDATE (i, t) ;   // Hx [i] += t
+                                    }
+                                }
+                            }
+                            GB_SORT_AND_GATHER_C_j ;    // gather into C(:,j)
+                        }
+                    }
+
+                }
+                else if (mask_is_M)
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse Gustavson task, C<M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+
+                    // Hf [i] < mark    : M(i,j)=0, C(i,j) is ignored.
+                    // Hf [i] == mark   : M(i,j)=1, and C(i,j) not yet seen.
+                    // Hf [i] == mark+1 : M(i,j)=1, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_B_j ;                // get B(:,j)
+                        if (cjnz == cvlen)          // C(:,j) is dense
+                        { 
+                            GB_COMPUTE_DENSE_C_j ;  // C(:,j) = A*B(:,j)
+                            continue ;              // no need to examine M(:,j)
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        // scatter M(:,j)
+                        GB_SCATTER_M_j (pM_start, pM_end, mark) ;
+                        if (16 * cjnz > cvlen)  // C(:,j) is not very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            { 
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                #define GB_IKJ_VECTORIZE GB_PRAGMA_VECTORIZE
+                                #define GB_IKJ_IVDEP     GB_PRAGMA_IVDEP
+                                #define GB_IKJ                                 \
+                                {                                              \
+                                    int64_t hf = Hf [i] ;                      \
+                                    if (hf == mark)                            \
+                                    {                                          \
+                                        /* C(i,j) = A(i,k) * B(k,j) */         \
+                                        Hf [i] = mark1 ;     /* mark as seen */\
+                                        GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
+                                        GB_HX_WRITE (i, t) ; /* Hx [i] = t */  \
+                                    }                                          \
+                                    else if (hf == mark1)                      \
+                                    {                                          \
+                                        /* C(i,j) += A(i,k) * B(k,j) */        \
+                                        GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
+                                        GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \
+                                    }                                          \
+                                }
+                                GB_SCAN_M_j_OR_A_k ;
+                                #undef GB_IKJ_VECTORIZE
+                                #undef GB_IKJ_IVDEP
+                                #undef GB_IKJ
+                            }
+                            GB_GATHER_ALL_C_j(mark1) ;  // gather into C(:,j) 
+                        }
+                        else    // C(:,j) is very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            { 
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                #define GB_IKJ_VECTORIZE GB_PRAGMA_VECTORIZE
+                                #define GB_IKJ_IVDEP     GB_PRAGMA_IVDEP
+                                #define GB_IKJ                                 \
+                                {                                              \
+                                    int64_t hf = Hf [i] ;                      \
+                                    if (hf == mark)                            \
+                                    {                                          \
+                                        /* C(i,j) = A(i,k) * B(k,j) */         \
+                                        Hf [i] = mark1 ;     /* mark as seen */\
+                                        GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
+                                        GB_HX_WRITE (i, t) ; /* Hx [i] = t */  \
+                                        Ci [pC++] = i ; /* C(:,j) pattern */   \
+                                    }                                          \
+                                    else if (hf == mark1)                      \
+                                    {                                          \
+                                        /* C(i,j) += A(i,k) * B(k,j) */        \
+                                        GB_MULT_A_ik_B_kj ;  /* t = aik*bkj */ \
+                                        GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \
+                                    }                                          \
+                                }
+                                GB_SCAN_M_j_OR_A_k ;
+                                #undef GB_IKJ_VECTORIZE
+                                #undef GB_IKJ_IVDEP
+                                #undef GB_IKJ
+                            }
+                            GB_SORT_AND_GATHER_C_j ;    // gather into C(:,j)
+                        }
+                    }
+
+                }
+                else
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse Gustavson task, C<!M>=A*B
+                    //----------------------------------------------------------
+
+                    // if !M:
+                    // Hf [i] < mark    : M(i,j)=0, C(i,j) is not yet seen.
+                    // Hf [i] == mark   : M(i,j)=1, so C(i,j) is ignored.
+                    // Hf [i] == mark+1 : M(i,j)=0, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_B_j ;                // get B(:,j)
+                        if (cjnz == cvlen)          // C(:,j) is dense
+                        { 
+                            GB_COMPUTE_DENSE_C_j ;  // C(:,j) = A*B(:,j)
+                            continue ;              // no need to examine M(:,j)
+                        }
+                        GB_GET_M_j ;            // get M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        // scatter M(:,j)
+                        GB_SCATTER_M_j (pM_start, pM_end, mark) ;
+                        if (16 * cjnz > cvlen)  // C(:,j) is not very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            {
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                if (aknz == 0) continue ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                // scan A(:,k)
+                                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                                {
+                                    int64_t i = Ai [pA] ;   // get A(i,k)
+                                    int64_t hf = Hf [i] ;
+                                    if (hf < mark)
+                                    { 
+                                        // C(i,j) = A(i,k) * B(k,j)
+                                        Hf [i] = mark1 ;     // mark as seen
+                                        GB_MULT_A_ik_B_kj ;  // t =A(i,k)*B(k,j)
+                                        GB_HX_WRITE (i, t) ; // Hx [i] = t
+                                    }
+                                    else if (hf == mark1)
+                                    { 
+                                        // C(i,j) += A(i,k) * B(k,j)
+                                        GB_MULT_A_ik_B_kj ;  // t =A(i,k)*B(k,j)
+                                        GB_HX_UPDATE (i, t) ;// Hx [i] += t
+                                    }
+                                }
+                            }
+                            GB_GATHER_ALL_C_j(mark1) ;  // gather into C(:,j) 
+                        }
+                        else    // C(:,j) is very sparse
+                        {
+                            for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                            {
+                                int64_t k = Bi [pB] ;       // get B(k,j)
+                                GB_GET_A_k ;                // get A(:,k)
+                                if (aknz == 0) continue ;
+                                GB_GET_B_kj ;               // bkj = B(k,j)
+                                // scan A(:,k)
+                                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                                {
+                                    int64_t i = Ai [pA] ;   // get A(i,k)
+                                    int64_t hf = Hf [i] ;
+                                    if (hf < mark)
+                                    { 
+                                        // C(i,j) = A(i,k) * B(k,j)
+                                        Hf [i] = mark1 ;        // mark as seen
+                                        GB_MULT_A_ik_B_kj ;  // t =A(i,k)*B(k,j)
+                                        GB_HX_WRITE (i, t) ;    // Hx [i] = t
+                                        Ci [pC++] = i ; // create C(:,j) pattern
+                                    }
+                                    else if (hf == mark1)
+                                    { 
+                                        // C(i,j) += A(i,k) * B(k,j)
+                                        GB_MULT_A_ik_B_kj ;  // t =A(i,k)*B(k,j)
+                                        GB_HX_UPDATE (i, t) ;   // Hx [i] += t
+                                    }
+                                }
+                            }
+                            GB_SORT_AND_GATHER_C_j ;    // gather into C(:,j)
+                        }
+                    }
+                }
+
+            }
+            else
+            {
+
+                //--------------------------------------------------------------
+                // phase5: coarse hash task
+                //--------------------------------------------------------------
+
+                int64_t *GB_RESTRICT Hi = TaskList [taskid].Hi ;
+                int64_t hash_bits = (hash_size-1) ;
+
+                if (M == NULL)
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse hash task, C=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let f = Hf [hash] and h = Hi [hash]
+
+                    // f < mark          : unoccupied.
+                    // h == i, f == mark : occupied with C(i,j)
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_B_j ;                // get B(:,j)
+                        if (bjnz == 1)              // C(:,j) = A(:,k)*B(k,j)
+                        { 
+                            GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE ;
+                            continue ;
+                        }
+                        mark++ ;
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            if (aknz == 0) continue ;
+                            GB_GET_B_kj ;               // bkj = B(k,j)
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;   // get A(i,k)
+                                GB_MULT_A_ik_B_kj ;     // t = A(i,k)*B(k,j)
+                                for (GB_HASH (i))   // find i in hash table
+                                {
+                                    if (Hf [hash] == mark)
+                                    {
+                                        // hash entry is occupied
+                                        if (Hi [hash] == i)
+                                        { 
+                                            // i already in the hash table
+                                            // Hx [hash] += t ;
+                                            GB_HX_UPDATE (hash, t) ;
+                                            break ;
+                                        }
+                                    }
+                                    else
+                                    { 
+                                        // hash entry is not occupied
+                                        Hf [hash] = mark ;
+                                        Hi [hash] = i ;
+                                        GB_HX_WRITE (hash, t) ;// Hx[hash]=t
+                                        Ci [pC++] = i ;
+                                        break ;
+                                    }
+                                }
+                            }
+                        }
+                        // found i if: Hf [hash] == mark and Hi [hash] == i
+                        GB_SORT_AND_GATHER_HASHED_C_j (mark, Hi [hash] == i)
+                    }
+
+                }
+                else if (mask_is_M)
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse hash task, C<M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let h = Hi [hash] and f = Hf [hash].
+
+                    // f < mark            : M(i,j)=0, C(i,j) is ignored.
+                    // h == i, f == mark   : M(i,j)=1, and C(i,j) not yet seen.
+                    // h == i, f == mark+1 : M(i,j)=1, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_M_j ;                // get M(:,j)
+                        GB_GET_M_j_RANGE (64) ;     // get 1st & last in M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        GB_HASH_M_j ;               // hash M(:,j)
+                        GB_GET_B_j ;                // get B(:,j)
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        { 
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            GB_SKIP_IF_A_k_DISJOINT_WITH_M_j ;
+                            GB_GET_B_kj ;               // bkj = B(k,j)
+                            #define GB_IKJ_VECTORIZE
+                            #define GB_IKJ_IVDEP
+                            #define GB_IKJ                                     \
+                            {                                                  \
+                                for (GB_HASH (i))       /* find i in hash */   \
+                                {                                              \
+                                    int64_t f = Hf [hash] ;                    \
+                                    if (f < mark) break ; /* M(i,j)=0, ignore*/\
+                                    if (Hi [hash] == i)                        \
+                                    {                                          \
+                                        GB_MULT_A_ik_B_kj ; /* t = aik*bkj */  \
+                                        if (f == mark) /* if true, i is new */ \
+                                        {                                      \
+                                            /* C(i,j) is new */                \
+                                            Hf [hash] = mark1 ; /* mark seen */\
+                                            GB_HX_WRITE (hash, t) ;/*Hx[.]=t */\
+                                            Ci [pC++] = i ;                    \
+                                        }                                      \
+                                        else                                   \
+                                        {                                      \
+                                            /* C(i,j) has been seen; update */ \
+                                            GB_HX_UPDATE (hash, t) ;           \
+                                        }                                      \
+                                        break ;                                \
+                                    }                                          \
+                                }                                              \
+                            }
+                            GB_SCAN_M_j_OR_A_k ;
+                            #undef GB_IKJ_VECTORIZE
+                            #undef GB_IKJ_IVDEP
+                            #undef GB_IKJ
+                        }
+                        // found i if: Hf [hash] == mark1 and Hi [hash] == i
+                        GB_SORT_AND_GATHER_HASHED_C_j (mark1, Hi [hash] == i) ;
+                    }
+
+                }
+                else
+                {
+
+                    //----------------------------------------------------------
+                    // phase5: coarse hash task, C<!M>=A*B
+                    //----------------------------------------------------------
+
+                    // Initially, Hf [...] < mark for all of Hf.
+                    // Let h = Hi [hash] and f = Hf [hash].
+
+                    // f < mark: unoccupied, M(i,j)=0, and C(i,j) not yet seen.
+                    // h == i, f == mark   : M(i,j)=1. C(i,j) ignored.
+                    // h == i, f == mark+1 : M(i,j)=0, and C(i,j) has been seen.
+
+                    for (int64_t kk = kfirst ; kk <= klast ; kk++)
+                    {
+                        int64_t pC = Cp [kk] ;
+                        int64_t cjnz = Cp [kk+1] - pC ;
+                        if (cjnz == 0) continue ;   // nothing to do
+                        GB_GET_M_j ;                // get M(:,j)
+                        mark += 2 ;
+                        int64_t mark1 = mark+1 ;
+                        GB_HASH_M_j ;               // hash M(:,j)
+                        GB_GET_B_j ;                // get B(:,j)
+                        for ( ; pB < pB_end ; pB++)     // scan B(:,j)
+                        {
+                            int64_t k = Bi [pB] ;       // get B(k,j)
+                            GB_GET_A_k ;                // get A(:,k)
+                            if (aknz == 0) continue ;
+                            GB_GET_B_kj ;               // bkj = B(k,j)
+                            // scan A(:,k)
+                            for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                            {
+                                int64_t i = Ai [pA] ;   // get A(i,k)
+                                for (GB_HASH (i))       // find i in hash
+                                {
+                                    int64_t f = Hf [hash] ;
+                                    if (f < mark)   // if true, i is new
+                                    { 
+                                        // C(i,j) is new
+                                        Hf [hash] = mark1 ; // mark C(i,j) seen
+                                        Hi [hash] = i ;
+                                        GB_MULT_A_ik_B_kj ; // t = A(i,k)*B(k,j)
+                                        GB_HX_WRITE (hash, t) ; // Hx [hash] = t
+                                        Ci [pC++] = i ;
+                                        break ;
+                                    }
+                                    if (Hi [hash] == i)
+                                    {
+                                        if (f == mark1)
+                                        { 
+                                            // C(i,j) has been seen; update it.
+                                            GB_MULT_A_ik_B_kj ;//t=A(i,k)*B(k,j)
+                                            GB_HX_UPDATE (hash, t) ;//Hx[ ] += t
+                                        }
+                                        break ;
+                                    }
+                                }
+                            }
+                        }
+                        // found i if: Hf [hash] == mark1 and Hi [hash] == i
+                        GB_SORT_AND_GATHER_HASHED_C_j (mark1, Hi [hash] == i) ;
+                    }
+                }
+            }
+        }
+    }
+
+    //==========================================================================
+    // phase6: final gather phase for fine hash tasks
+    //==========================================================================
+
+    if (cjnz_max > 0)
+    {
+        int64_t *GB_RESTRICT W = NULL ;
+        bool parallel_sort = (cjnz_max > GB_BASECASE && nthreads > 1) ;
+        if (parallel_sort)
+        {
+            // allocate workspace for parallel mergesort
+            GB_MALLOC_MEMORY (W, cjnz_max, sizeof (int64_t)) ;
+            if (W == NULL)
+            { 
+                // out of memory
+                return (GrB_OUT_OF_MEMORY) ;
+            }
+        }
+
+        for (taskid = 0 ; taskid < nfine ; taskid++)
+        {
+            int64_t hash_size = TaskList [taskid].hsize ;
+            bool use_Gustavson = (hash_size == cvlen) ;
+            if (!use_Gustavson && taskid == TaskList [taskid].master)
+            {
+
+                //--------------------------------------------------------------
+                // phase6: fine hash task, C=A*B, C<M>=A*B, C<!M>=A*B
+                //--------------------------------------------------------------
+
+                // (Hf [hash] & 3) == 2 if C(i,j) is an entry in C(:,j),
+                // and the index i of the entry is (Hf [hash] >> 2) - 1.
+
+                int64_t kk = TaskList [taskid].vector ;
+                int64_t hash_bits = (hash_size-1) ;
+                int64_t  *GB_RESTRICT Hf = TaskList [taskid].Hf ;
+                int64_t cjnz = Cp [kk+1] - Cp [kk] ;
+
+                // sort the pattern of C(:,j)
+                int nth = GB_nthreads (cjnz, chunk, nthreads) ;
+                if (parallel_sort && nth > 1)
+                { 
+                    // parallel mergesort
+                    GB_msort_1 (Ci + Cp [kk], W, cjnz, nth) ;
+                }
+                else
+                { 
+                    // sequential quicksort
+                    GB_qsort_1a (Ci + Cp [kk], cjnz) ;
+                }
+
+                #if !GB_IS_ANY_PAIR_SEMIRING
+
+                    GB_CTYPE *GB_RESTRICT Hx =
+                        (GB_CTYPE *) TaskList [taskid].Hx ;
+                    // gather the values of C(:,j)
+                    int64_t pC ;
+                    #pragma omp parallel for num_threads(nth) schedule(static)
+                    for (pC = Cp [kk] ; pC < Cp [kk+1] ; pC++)
+                    {
+                        int64_t i = Ci [pC] ;   // get C(i,j)
+                        int64_t i1 = i + 1 ;
+                        for (GB_HASH (i))       // find i in hash table
+                        {
+                            int64_t hf = Hf [hash] ;
+                            if ((hf & 3) == 2 && (hf >> 2) == i1)
+                            { 
+                                // found i in the hash table
+                                GB_CIJ_GATHER (pC, hash) ; // Cx[pC] = Hx[hash]
+                                break ;
+                            }
+                        }
+                    }
+
+                #endif
+            }
+        }
+
+        // free workspace
+        GB_FREE_MEMORY (W, cjnz_max, sizeof (int64_t)) ;
+    }
+}
+
+#undef Cx
+#undef Hx
+
diff --git a/Source/Template/GB_AxB_saxpy3_template.h b/Source/Template/GB_AxB_saxpy3_template.h
new file mode 100644
index 0000000000..5e022b2bdc
--- /dev/null
+++ b/Source/Template/GB_AxB_saxpy3_template.h
@@ -0,0 +1,513 @@
+//------------------------------------------------------------------------------
+// GB_AxB_saxpy3_template.h: C=A*B, C<M>=A*B, or C<!M>=A*B via saxpy3 method
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// Definitions for GB_AxB_saxpy3_template.c
+
+#ifndef GB_AXB_SAXPY3_TEMPLATE_H
+#define GB_AXB_SAXPY3_TEMPLATE_H
+
+//------------------------------------------------------------------------------
+// GB_GET_M_j: prepare to iterate over M(:,j)
+//------------------------------------------------------------------------------
+
+// prepare to iterate over the vector M(:,j), for the (kk)th vector of B
+// FUTURE::: lookup all M(:,j) for all vectors in B, in a single pass,
+// and save the mapping (like C_to_M mapping in GB_ewise_slice)
+#define GB_GET_M_j                                              \
+    int64_t mpleft = 0 ;                                        \
+    int64_t mpright = mnvec-1 ;                                 \
+    int64_t pM_start, pM_end ;                                  \
+    GB_lookup (M_is_hyper, Mh, Mp, &mpleft, mpright,            \
+        ((Bh == NULL) ? kk : Bh [kk]), &pM_start, &pM_end) ;    \
+    int64_t mjnz = pM_end - pM_start ;    /* nnz (M (:,j)) */
+
+//------------------------------------------------------------------------------
+// GB_GET_M_j_RANGE: get the first and last indices in M(:,j)
+//------------------------------------------------------------------------------
+
+#define GB_GET_M_j_RANGE(gamma)                                 \
+    int64_t im_first = -1, im_last = -1 ;                       \
+    if (mjnz > 0)                                               \
+    {                                                           \
+        im_first = Mi [pM_start] ;  /* get first M(:,j) */      \
+        im_last  = Mi [pM_end-1] ;  /* get last M(:,j) */       \
+    }                                                           \
+    int64_t mjnz_much = mjnz * gamma
+
+//------------------------------------------------------------------------------
+// GB_SCATTER_M_j: scatter M(:,j) for a fine or coarse Gustavson task
+//------------------------------------------------------------------------------
+
+#define GB_SCATTER_M_j_TYPE(mask_t,pMstart,pMend,mark)                  \
+{                                                                       \
+    const mask_t *GB_RESTRICT Mxx = (mask_t *) Mx ;                     \
+    for (int64_t pM = pMstart ; pM < pMend ; pM++) /* scan M(:,j) */    \
+    {                                                                   \
+        if (Mxx [pM]) Hf [Mi [pM]] = mark ;   /* Hf [i] = M(i,j) */     \
+    }                                                                   \
+}                                                                       \
+break ;
+
+// scatter M(:,j) for a coarse Gustavson task, C<M>=A*B or C<!M>=A*B
+#define GB_SCATTER_M_j(pMstart,pMend,mark)                                  \
+    if (Mx == NULL)                                                         \
+    {                                                                       \
+        /* mask is structural, not valued */                                \
+        for (int64_t pM = pMstart ; pM < pMend ; pM++)                      \
+        {                                                                   \
+            Hf [Mi [pM]] = mark ;   /* Hf [i] = M(i,j) */                   \
+        }                                                                   \
+    }                                                                       \
+    else                                                                    \
+    {                                                                       \
+        /* mask is valued, not structural */                                \
+        switch (msize)                                                      \
+        {                                                                   \
+            default:                                                        \
+            case 1: GB_SCATTER_M_j_TYPE (uint8_t , pMstart, pMend, mark) ;  \
+            case 2: GB_SCATTER_M_j_TYPE (uint16_t, pMstart, pMend, mark) ;  \
+            case 4: GB_SCATTER_M_j_TYPE (uint32_t, pMstart, pMend, mark) ;  \
+            case 8: GB_SCATTER_M_j_TYPE (uint64_t, pMstart, pMend, mark) ;  \
+        }                                                                   \
+    }
+
+//------------------------------------------------------------------------------
+// GB_HASH_M_j: scatter M(:,j) for a coarse hash task
+//------------------------------------------------------------------------------
+
+// hash M(:,j) into Hf and Hi for coarse hash task, C<M>=A*B or C<!M>=A*B
+#define GB_HASH_M_j                                                     \
+    for (int64_t pM = pM_start ; pM < pM_end ; pM++) /* scan M(:,j) */  \
+    {                                                                   \
+        GB_GET_M_ij ;           /* get M(i,j) */                        \
+        if (!mij) continue ;    /* skip if M(i,j)=0 */                  \
+        int64_t i = Mi [pM] ;                                           \
+        for (GB_HASH (i))       /* find i in hash */                    \
+        {                                                               \
+            if (Hf [hash] < mark)                                       \
+            {                                                           \
+                Hf [hash] = mark ;  /* insert M(i,j)=1 */               \
+                Hi [hash] = i ;                                         \
+                break ;                                                 \
+            }                                                           \
+        }                                                               \
+    }
+
+//------------------------------------------------------------------------------
+// GB_GET_B_j: prepare to iterate over B(:,j)
+//------------------------------------------------------------------------------
+
+// prepare to iterate over the vector B(:,j), the (kk)th vector in B,
+// where j == ((Bh == NULL) ? kk : Bh [kk]).  Note that j itself is never
+// needed; just kk.
+#define GB_GET_B_j                                                          \
+    int64_t pleft = 0 ;                                                     \
+    int64_t pright = anvec-1 ;                                              \
+    int64_t pB = Bp [kk] ;                                                  \
+    int64_t pB_end = Bp [kk+1] ;                                            \
+    int64_t bjnz = pB_end - pB ;  /* nnz (B (:,j) */                        \
+    /* FUTURE::: can skip if mjnz == 0 for C<M>=A*B tasks */                \
+    if (A_is_hyper && bjnz > 2)                                             \
+    {                                                                       \
+        /* trim Ah [0..pright] to remove any entries past last B(:,j), */   \
+        /* to speed up GB_lookup in GB_GET_A_k. */                          \
+        GB_bracket_right (Bi [pB_end-1], Ah, 0, &pright) ;                  \
+    }
+
+//------------------------------------------------------------------------------
+// GB_GET_B_kj: get the numeric value of B(k,j)
+//------------------------------------------------------------------------------
+
+#define GB_GET_B_kj \
+    GB_GETB (bkj, Bx, pB)       /* bkj = Bx [pB] */
+
+//------------------------------------------------------------------------------
+// GB_GET_A_k: prepare to iterate over the vector A(:,k)
+//------------------------------------------------------------------------------
+
+#define GB_GET_A_k                                                          \
+    int64_t pA_start, pA_end ;                                              \
+    GB_lookup (A_is_hyper, Ah, Ap, &pleft, pright, k, &pA_start, &pA_end) ; \
+    int64_t aknz = pA_end - pA_start ;    /* nnz (A (:,k)) */
+
+//------------------------------------------------------------------------------
+// GB_SKIP_IF_A_k_DISJOINT_WITH_M_j:  skip if A(:,k) and M(:,j) are disjoint
+//------------------------------------------------------------------------------
+
+// skip C(:,j)<M> += A(:,k)*B(k,j) if A(:,k) and M(:,j), for C<M>=A*B methods
+#define GB_SKIP_IF_A_k_DISJOINT_WITH_M_j                    \
+    if (aknz == 0) continue ;                               \
+    int64_t alo = Ai [pA_start] ;   /* get first A(:,k) */  \
+    int64_t ahi = Ai [pA_end-1] ;   /* get last A(:,k) */   \
+    if (ahi < im_first || alo > im_last) continue
+
+//------------------------------------------------------------------------------
+// GB_GET_M_ij: get the numeric value of M(i,j)
+//------------------------------------------------------------------------------
+
+#define GB_GET_M_ij                                 \
+    /* get M(i,j), at Mi [pM] and Mx [pM] */        \
+    bool mij = GB_mcast (Mx, pM, msize)
+
+//------------------------------------------------------------------------------
+// GB_MULT_A_ik_B_kj: declare t and compute t = A(i,k) * B(k,j)
+//------------------------------------------------------------------------------
+
+#if GB_IS_PAIR_MULTIPLIER
+
+    // PAIR multiplier: t is always 1; no numeric work to do to compute t.
+    // The LXOR_PAIR and PLUS_PAIR semirings need the value t = 1 to use in
+    // their monoid operator, however.
+    #define t 1
+    #define GB_MULT_A_ik_B_kj
+
+#else
+
+    // typical semiring
+    #define GB_MULT_A_ik_B_kj                                   \
+        GB_GETA (aik, Ax, pA) ;     /* aik = Ax [pA] ;  */      \
+        GB_CIJ_DECLARE (t) ;        /* ctype t ;        */      \
+        GB_MULT (t, aik, bkj)       /* t = aik * bkj ;  */
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_COMPUTE_DENSE_C_j: compute C(:,j)=A*B(:,j) when C(:,j) is completely dense
+//------------------------------------------------------------------------------
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; no numeric work to do
+    #define GB_COMPUTE_DENSE_C_j                                \
+        for (int64_t i = 0 ; i < cvlen ; i++)                   \
+        {                                                       \
+            Ci [pC + i] = i ;                                   \
+        }
+
+#else
+
+    // typical semiring
+    #define GB_COMPUTE_DENSE_C_j                                    \
+        for (int64_t i = 0 ; i < cvlen ; i++)                       \
+        {                                                           \
+            Ci [pC + i] = i ;                                       \
+            GB_CIJ_WRITE (pC + i, GB_IDENTITY) ; /* C(i,j)=0 */     \
+        }                                                           \
+        for ( ; pB < pB_end ; pB++)     /* scan B(:,j) */           \
+        {                                                           \
+            int64_t k = Bi [pB] ;       /* get B(k,j) */            \
+            GB_GET_A_k ;                /* get A(:,k) */            \
+            if (aknz == 0) continue ;                               \
+            GB_GET_B_kj ;               /* bkj = B(k,j) */          \
+            /* FUTURE::: handle the case when A(:,k) is dense */    \
+            /* scan A(:,k) */                                       \
+            for (int64_t pA = pA_start ; pA < pA_end ; pA++)        \
+            {                                                       \
+                int64_t i = Ai [pA] ;    /* get A(i,k) */           \
+                GB_MULT_A_ik_B_kj ;      /* t = A(i,k)*B(k,j) */    \
+                GB_CIJ_UPDATE (pC + i, t) ; /* Cx [pC+i]+=t */      \
+            }                                                       \
+        }
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE: compute C(:,j) when nnz(B(:,j)) == 1
+//------------------------------------------------------------------------------
+
+// C(:,j) = A(:,k)*B(k,j) when there is a single entry in B(:,j)
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; no numeric work to do
+    #define GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE                      \
+        int64_t k = Bi [pB] ;       /* get B(k,j) */                \
+        GB_GET_A_k ;                /* get A(:,k) */                \
+        memcpy (Ci + pC, Ai + pA_start, aknz * sizeof (int64_t)) ;
+
+#else
+
+    // typical semiring
+    #define GB_COMPUTE_C_j_WHEN_NNZ_B_j_IS_ONE                      \
+        int64_t k = Bi [pB] ;       /* get B(k,j) */                \
+        GB_GET_A_k ;                /* get A(:,k) */                \
+        GB_GET_B_kj ;               /* bkj = B(k,j) */              \
+        /* scan A(:,k) */                                           \
+        for (int64_t pA = pA_start ; pA < pA_end ; pA++)            \
+        {                                                           \
+            int64_t i = Ai [pA] ;       /* get A(i,k) */            \
+            GB_MULT_A_ik_B_kj ;         /* t = A(i,k)*B(k,j) */     \
+            GB_CIJ_WRITE (pC, t) ;      /* Cx [pC] = t */           \
+            Ci [pC++] = i ;                                         \
+        }
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_GATHER_ALL_C_j: gather the values and pattern of C(:,j)
+//------------------------------------------------------------------------------
+
+// gather the pattern and values of C(:,j) for a coarse Gustavson task (no sort)
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; no numeric work to do
+    #define GB_GATHER_ALL_C_j(mark)                                 \
+        for (int64_t i = 0 ; i < cvlen ; i++)                       \
+        {                                                           \
+            if (Hf [i] == mark)                                     \
+            {                                                       \
+                Ci [pC++] = i ;                                     \
+            }                                                       \
+        }
+
+#else
+
+    // typical semiring
+    #define GB_GATHER_ALL_C_j(mark)                                 \
+        for (int64_t i = 0 ; i < cvlen ; i++)                       \
+        {                                                           \
+            if (Hf [i] == mark)                                     \
+            {                                                       \
+                GB_CIJ_GATHER (pC, i) ; /* Cx [pC] = Hx [i] */      \
+                Ci [pC++] = i ;                                     \
+            }                                                       \
+        }
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_SORT_AND_GATHER_C_j: sort the pattern of C(:,j) and gather values
+//------------------------------------------------------------------------------
+
+// sort the pattern of C(:,j) then gather the values for a coarse Gustavson task
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; just sort the pattern
+    #define GB_SORT_AND_GATHER_C_j                              \
+        /* sort the pattern of C(:,j) */                        \
+        GB_qsort_1a (Ci + Cp [kk], cjnz) ;
+
+#else
+
+    // typical semiring
+    #define GB_SORT_AND_GATHER_C_j                              \
+        /* sort the pattern of C(:,j) */                        \
+        GB_qsort_1a (Ci + Cp [kk], cjnz) ;                      \
+        /* gather the values into C(:,j) */                     \
+        for (int64_t pC = Cp [kk] ; pC < Cp [kk+1] ; pC++)      \
+        {                                                       \
+            int64_t i = Ci [pC] ;                               \
+            GB_CIJ_GATHER (pC, i) ;   /* Cx [pC] = Hx [i] */    \
+        }
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_SORT_AND_GATHER_HASHED_C_j: sort pattern, gather values, for coarse hash 
+//------------------------------------------------------------------------------
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; just sort the pattern
+    #define GB_SORT_AND_GATHER_HASHED_C_j(hash_mark,Hi_hash_equals_i)       \
+        /* sort the pattern of C(:,j) */                                    \
+        GB_qsort_1a (Ci + Cp [kk], cjnz) ;
+
+#else
+
+    // sort the pattern of C(:,j) then gather the values for a coarse hash task
+    #define GB_SORT_AND_GATHER_HASHED_C_j(hash_mark,Hi_hash_equals_i)       \
+        /* sort the pattern of C(:,j) */                                    \
+        GB_qsort_1a (Ci + Cp [kk], cjnz) ;                                  \
+        for (int64_t pC = Cp [kk] ; pC < Cp [kk+1] ; pC++)                  \
+        {                                                                   \
+            int64_t i = Ci [pC] ;                                           \
+            int64_t marked = (hash_mark) ;                                  \
+            for (GB_HASH (i))           /* find i in hash table */          \
+            {                                                               \
+                if (Hf [hash] == marked && (Hi_hash_equals_i))              \
+                {                                                           \
+                    /* i found in the hash table */                         \
+                    /* Cx [pC] = Hx [hash] ; */                             \
+                    GB_CIJ_GATHER (pC, hash) ;                              \
+                    break ;                                                 \
+                }                                                           \
+            }                                                               \
+        }
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_SCAN_M_j_OR_A_k: compute C(:,j) using linear scan or binary search
+//------------------------------------------------------------------------------
+
+// C(:,j)<M(:,j)>=A(:,k)*B(k,j) using one of two methods
+#define GB_SCAN_M_j_OR_A_k                                              \
+{                                                                       \
+    if (aknz > 256 && mjnz_much < aknz)                                 \
+    /* nnz(M(:,j)) much less than nnz(A(:,k)) */                        \
+    {                                                                   \
+        /* scan M(:,j), and do binary search for A(i,k) */              \
+        int64_t pA = pA_start ;                                         \
+        for (int64_t pM = pM_start ; pM < pM_end ; pM++)                \
+        {                                                               \
+            GB_GET_M_ij ;           /* get M(i,j) */                    \
+            if (!mij) continue ;    /* skip if M(i,j)=0 */              \
+            int64_t i = Mi [pM] ;                                       \
+            bool found ;            /* search for A(i,k) */             \
+            int64_t apright = pA_end - 1 ;                              \
+            GB_BINARY_SEARCH (i, Ai, pA, apright, found) ;              \
+            if (found)                                                  \
+            {                                                           \
+                /* C(i,j)<M(i,j)> += A(i,k) * B(k,j) for this method. */\
+                /* M(i,j) is now known to be equal to 1, so there are */\
+                /* cases in the GB_IKJ operation that can never */      \
+                /* occur.  This could be pruned from the GB_IKJ */      \
+                /* operation, but then this operation would differ */   \
+                /* from the GB_IKJ operation in the linear-time scan */ \
+                /* of A(:,j), below.  It's unlikely that pruning this */\
+                /* case would lead to much performance improvement. */  \
+                GB_IKJ ;                                                \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+    else                                                                \
+    {                                                                   \
+        /* scan A(:,k), and lookup M(i,j) */                            \
+        for (int64_t pA = pA_start ; pA < pA_end ; pA++)                \
+        {                                                               \
+            int64_t i = Ai [pA] ;    /* get A(i,k) */                   \
+            /* do C(i,j)<M(i,j)> += A(i,k) * B(k,j) for this method */  \
+            /* M(i,j) may be 0 or 1, as given in the hash table */      \
+            GB_IKJ ;                                                    \
+        }                                                               \
+    }                                                                   \
+}
+
+//------------------------------------------------------------------------------
+// GB_ATOMIC_UPDATE_HX:  Hx [i] += t
+//------------------------------------------------------------------------------
+
+#if GB_IS_ANY_MONOID
+
+    // The update Hx [i] += t can be skipped entirely, for the ANY monoid.
+    #define GB_ATOMIC_UPDATE_HX(i,t)
+
+#elif GB_HAS_ATOMIC
+
+    // Hx [i] += t via atomic update
+    #if GB_HAS_OMP_ATOMIC
+
+        // built-in PLUS, TIMES, LOR, LAND, LXOR monoids can be
+        // implemented with an OpenMP pragma
+        #define GB_ATOMIC_UPDATE_HX(i,t)                            \
+            GB_ATOMIC_UPDATE                                        \
+            GB_HX_UPDATE (i, t)
+
+    #else
+
+        // built-in MIN, MAX, and EQ monoids only, which cannot
+        // be implemented with an OpenMP pragma
+        #define GB_ATOMIC_UPDATE_HX(i,t)                            \
+            GB_CTYPE xold, xnew, *px = Hx + (i) ;                   \
+            do                                                      \
+            {                                                       \
+                /* xold = Hx [i] via atomic read */                 \
+                GB_ATOMIC_READ                                      \
+                xold = (*px) ;                                      \
+                /* xnew = xold + t */                               \
+                xnew = GB_ADD_FUNCTION (xold, t) ;                  \
+            }                                                       \
+            while (!__atomic_compare_exchange (px, &xold, &xnew,    \
+                true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+
+    #endif
+
+//          prior version:
+//          while (!__sync_bool_compare_and_swap
+//              ((GB_CTYPE_PUN *) px,
+//              * ((GB_CTYPE_PUN *) (&xold)),
+//              * ((GB_CTYPE_PUN *) (&xnew))))
+
+#else
+
+    // Hx [i] += t can only be done inside the critical section
+    #define GB_ATOMIC_UPDATE_HX(i,t)       \
+        GB_PRAGMA (omp flush)           \
+        GB_HX_UPDATE (i, t) ;           \
+        GB_PRAGMA (omp flush)
+
+#endif
+
+//------------------------------------------------------------------------------
+// GB_ATOMIC_WRITE_HX:  Hx [i] = t
+//------------------------------------------------------------------------------
+
+#if GB_IS_ANY_PAIR_SEMIRING
+
+    // ANY_PAIR: result is purely symbolic; no numeric work to do
+    #define GB_ATOMIC_WRITE_HX(i,t)
+
+#else 
+
+    // atomic write
+    #if GB_HAS_ATOMIC
+        // Hx [i] = t via atomic write
+        #define GB_ATOMIC_WRITE_HX(i,t)       \
+            GB_ATOMIC_WRITE   \
+            GB_HX_WRITE (i, t)
+    #else
+        // Hx [i] = t via critical section
+        #define GB_ATOMIC_WRITE_HX(i,t)       \
+            GB_PRAGMA (omp flush)          \
+            GB_HX_WRITE (i, t) ;           \
+            GB_PRAGMA (omp flush)
+    #endif
+
+#endif
+
+//------------------------------------------------------------------------------
+// hash
+//------------------------------------------------------------------------------
+
+// to iterate over the hash table, looking for index i:
+// for (GB_HASH (i)) { ... }
+#define GB_HASH(i) int64_t hash = GB_HASH_FUNCTION (i) ; ; GB_REHASH (hash,i)
+
+#endif
+
+//------------------------------------------------------------------------------
+// free workspace
+//------------------------------------------------------------------------------
+
+#undef  GB_FREE_INITIAL_WORK
+#define GB_FREE_INITIAL_WORK ;
+
+#undef  GB_FREE_TASKLIST_AND_HASH_TABLES
+#define GB_FREE_TASKLIST_AND_HASH_TABLES                                    \
+{                                                                           \
+    GB_FREE_MEMORY (*(TaskList_handle), ntasks, sizeof (GB_saxpy3task_struct));\
+    GB_FREE_MEMORY (Hi_all, Hi_size_total, sizeof (int64_t)) ;              \
+    GB_FREE_MEMORY (Hf_all, Hf_size_total, sizeof (int64_t)) ;              \
+    GB_FREE_MEMORY (Hx_all, Hx_size_total, 1) ;                             \
+}
+
+#undef  GB_FREE_WORK
+#define GB_FREE_WORK                                                        \
+{                                                                           \
+    GB_FREE_INITIAL_WORK ;                                                  \
+    GB_FREE_TASKLIST_AND_HASH_TABLES ;                                      \
+}
+
+#undef  GB_FREE_ALL
+#define GB_FREE_ALL                                                         \
+{                                                                           \
+    GB_FREE_WORK ;                                                          \
+    GB_MATRIX_FREE (Chandle) ;                                              \
+}
+
diff --git a/Source/Template/GB_AxB_type_factory.c b/Source/Template/GB_AxB_type_factory.c
index 2aa61f2d6a..597695c7f9 100644
--- a/Source/Template/GB_AxB_type_factory.c
+++ b/Source/Template/GB_AxB_type_factory.c
@@ -2,27 +2,20 @@
 // GB_AxB_type_factory.c: switch factory for C=A*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 // A template file #include'd in GB_AxB_factory.c
 
-// This file is used for 19 operators.  The multiply operator is combined here
-// with 40 or 44 monoids to create 40 or 44 unique semiring workers.
+// GB_NO_BOOLEAN is defined for multiply operators in the #include'ing file
+// (min, max, plus, minus, rminus, times, div, rdiv, is*) since those multiply
+// operators are redundant and have been renamed.  For these, the boolean
+// monoids are not needed.
 
-//      FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
-//      ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//      LAND, LOR, LXOR.
-
-// For all of them, the types of x, y, and z are the same.
-// There are 40 non-boolean monoids and 0 or 4 boolean monoids defined here.
-
-// GB_NO_BOOLEAN is defined for 15 of these multiply operators in the
-// #include'ing file (min, max, plus, minus, rminus, times, div, rdiv, is*)
-// since those multiply operators are redundant and have been renamed.  For
-// these, the boolean monoids are not needed.
+// For the PAIR multiply operator, the monoids MIN, MAX, TIMES, EQ, LAND, 
+// and LOR have been renamed to ANY_PAIR.  See GB_AxB_semiring_builtin.c.
 
 ASSERT (zcode == xycode) ;
 
@@ -31,6 +24,9 @@ if (zcode != GB_BOOL_code)
     switch (add_opcode)
     {
 
+        // MIN_PAIR, MAX_PAIR, and TIMES_PAIR have been renamed to ANY_PAIR
+        #ifndef GB_MULT_IS_PAIR_OPERATOR
+
         case GB_MIN_opcode:
 
             switch (zcode)
@@ -67,6 +63,26 @@ if (zcode != GB_BOOL_code)
             }
             break ;
 
+        case GB_TIMES_opcode:
+
+            switch (zcode)
+            {
+                case GB_INT8_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _int8  )
+                case GB_UINT8_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint8 )
+                case GB_INT16_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int16 )
+                case GB_UINT16_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint16)
+                case GB_INT32_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int32 )
+                case GB_UINT32_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint32)
+                case GB_INT64_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int64 )
+                case GB_UINT64_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint64)
+                case GB_FP32_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _fp32  )
+                case GB_FP64_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _fp64  )
+                default: ;
+            }
+            break ;
+
+        #endif
+
         case GB_PLUS_opcode:
 
             switch (zcode)
@@ -85,20 +101,20 @@ if (zcode != GB_BOOL_code)
             }
             break ;
 
-        case GB_TIMES_opcode:
+        case GB_ANY_opcode:
 
             switch (zcode)
             {
-                case GB_INT8_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _int8  )
-                case GB_UINT8_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint8 )
-                case GB_INT16_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int16 )
-                case GB_UINT16_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint16)
-                case GB_INT32_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int32 )
-                case GB_UINT32_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint32)
-                case GB_INT64_code  : GB_AxB_WORKER (_times, GB_MULT_NAME, _int64 )
-                case GB_UINT64_code : GB_AxB_WORKER (_times, GB_MULT_NAME, _uint64)
-                case GB_FP32_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _fp32  )
-                case GB_FP64_code   : GB_AxB_WORKER (_times, GB_MULT_NAME, _fp64  )
+                case GB_INT8_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _int8  )
+                case GB_UINT8_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint8 )
+                case GB_INT16_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int16 )
+                case GB_UINT16_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint16)
+                case GB_INT32_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int32 )
+                case GB_UINT32_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint32)
+                case GB_INT64_code  : GB_AxB_WORKER (_any, GB_MULT_NAME, _int64 )
+                case GB_UINT64_code : GB_AxB_WORKER (_any, GB_MULT_NAME, _uint64)
+                case GB_FP32_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _fp32  )
+                case GB_FP64_code   : GB_AxB_WORKER (_any, GB_MULT_NAME, _fp64  )
                 default: ;
             }
             break ;
@@ -112,10 +128,14 @@ else
 {
         switch (add_opcode)
         {
+            // EQ_PAIR, LOR_PAIR, LAND_PAIR, been renamed to ANY_PAIR
+            #ifndef GB_MULT_IS_PAIR_OPERATOR
             case GB_LOR_opcode  : GB_AxB_WORKER (_lor , GB_MULT_NAME, _bool)
             case GB_LAND_opcode : GB_AxB_WORKER (_land, GB_MULT_NAME, _bool)
-            case GB_LXOR_opcode : GB_AxB_WORKER (_lxor, GB_MULT_NAME, _bool)
             case GB_EQ_opcode   : GB_AxB_WORKER (_eq  , GB_MULT_NAME, _bool)
+            #endif
+            case GB_LXOR_opcode : GB_AxB_WORKER (_lxor, GB_MULT_NAME, _bool)
+            case GB_ANY_opcode  : GB_AxB_WORKER (_any , GB_MULT_NAME, _bool)
             default: ;
         }
 }
diff --git a/Source/Template/GB_add_template.c b/Source/Template/GB_add_template.c
index d1544b39b9..db508a4309 100644
--- a/Source/Template/GB_add_template.c
+++ b/Source/Template/GB_add_template.c
@@ -2,7 +2,7 @@
 // GB_add_template:  phase1 and phase2 for C=A+B, C<M>=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,15 +35,13 @@
     // const int64_t *GB_RESTRICT Mh = NULL ;
     const int64_t *GB_RESTRICT Mi = NULL ;
     const GB_void *GB_RESTRICT Mx = NULL ;
-    GB_cast_function cast_M = NULL ;
     size_t msize = 0 ;
     if (M != NULL)
     { 
         Mp = M->p ;
         // Mh = M->h ;
         Mi = M->i ;
-        Mx = M->x ;
-        cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
+        Mx = (Mask_struct ? NULL : (M->x)) ;
         msize = M->type->size ;
     }
 
@@ -145,6 +143,7 @@
             // ----
 
             int64_t ajnz = pA_end - pA ;        // nnz in A(:,j) for this slice
+            int64_t pA_start = pA ;
             bool adense = (ajnz == len) ;
             int64_t iA_first = -1, iA_last = -1 ;
             if (ajnz > 0)
@@ -180,6 +179,7 @@
             // ----
 
             int64_t bjnz = pB_end - pB ;        // nnz in B(:,j) for this slice
+            int64_t pB_start = pB ;
             bool bdense = (bjnz == len) ;
             int64_t iB_first = -1, iB_last = -1 ;
             if (bjnz > 0)
@@ -544,79 +544,222 @@
                 // C(:,j)<M(:,j)> = A(:,j) + B (:,j)
                 //--------------------------------------------------------------
 
-                for ( ; pM < pM_end ; pM++)
+                // A and B cannot both be dense, because GB_ewise converts
+                // eWiseAdd(A,B) into eWiseMult(A,B) in that case.
+
+                bool mask_is_easy = 
+                    (adense && B == M) ||
+                    (bdense && A == M) ||
+                    (A == M && B == M) ;
+
+                if (mask_is_easy && Mask_struct)
                 {
 
                     //----------------------------------------------------------
-                    // get M(i,j) for A(i,j) + B (i,j)
+                    // special case: mask is very easy to use
                     //----------------------------------------------------------
 
-                    int64_t i = Mi [pM] ;
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                    if (!mij) continue ;
+                    // the mask M is structural, and every entry in the
+                    // mask is guaranteed to appear in A+B
 
-                    //----------------------------------------------------------
-                    // get A(i,j)
-                    //----------------------------------------------------------
+                    int64_t mjnz = pM_end - pM ;        // nnz (M (:,j))
 
-                    int64_t apright = pA_end - 1 ;
-                    bool afound ;
-                    GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
+                    #if defined ( GB_PHASE_1_OF_2 )
 
-                    //----------------------------------------------------------
-                    // get B(i,j)
-                    //----------------------------------------------------------
+                    cjnz = mjnz ;
 
-                    int64_t bpright = pB_end - 1 ;
-                    bool bfound ;
-                    GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
+                    #else
 
-                    //----------------------------------------------------------
-                    // C(i,j) = A(i,j) + B(i,j)
-                    //----------------------------------------------------------
+                    // copy the pattern into C (:,j)
+                    int64_t pC_start = pC ;
+                    int64_t pM_start = pM ;
+                    memcpy (Ci + pC, Mi + pM, mjnz * sizeof (int64_t)) ;
+                    int64_t pA_offset = pA_start - iA_first ;
+                    int64_t pB_offset = pB_start - iB_first ;
 
-                    if (afound && bfound)
+                    if (adense && B == M)
                     { 
-                        // C (i,j) = A (i,j) + B (i,j)
-                        #if defined ( GB_PHASE_1_OF_2 )
-                        cjnz++ ;
-                        #else
-                        Ci [pC] = i ;
-                        GB_GETA (aij, Ax, pA) ;
-                        GB_GETB (bij, Bx, pB) ;
-                        GB_BINOP (GB_CX (pC), aij, bij) ;
-                        pC++ ;
-                        #endif
+
+                        //------------------------------------------------------
+                        // A dense, B == M
+                        //------------------------------------------------------
+
+                        GB_PRAGMA_VECTORIZE
+                        for (int64_t p = 0 ; p < mjnz ; p++)
+                        {
+                            int64_t pM = p + pM_start ;
+                            int64_t pC = p + pC_start ;
+                            int64_t i = Mi [pM] ;
+                            ASSERT (GB_mcast (Mx, pM, msize)) ;
+                            ASSERT (Ai [pA_offset + i] == i) ;
+                            ASSERT (Bi [pM] == i) ;
+                            GB_GETA (aij, Ax, pA_offset + i) ;
+                            GB_GETB (bij, Bx, pM) ;
+                            GB_BINOP (GB_CX (pC), aij, bij) ;
+                        }
+
                     }
-                    else if (afound)
+                    else if (bdense && A == M)
                     { 
-                        // C (i,j) = A (i,j)
-                        #if defined ( GB_PHASE_1_OF_2 )
-                        cjnz++ ;
-                        #else
-                        Ci [pC] = i ;
-                        GB_COPY_A_TO_C (GB_CX (pC), Ax, pA) ;
-                        pC++ ;
-                        #endif
+
+                        //------------------------------------------------------
+                        // B dense, A == M
+                        //------------------------------------------------------
+
+                        GB_PRAGMA_VECTORIZE
+                        for (int64_t p = 0 ; p < mjnz ; p++)
+                        {
+                            int64_t pM = p + pM_start ;
+                            int64_t pC = p + pC_start ;
+                            int64_t i = Mi [pM] ;
+                            ASSERT (GB_mcast (Mx, pM, msize)) ;
+                            ASSERT (Ai [pM] == i) ;
+                            ASSERT (Bi [pB_offset + i] == i) ;
+                            GB_GETA (aij, Ax, pM) ;
+                            GB_GETB (bij, Bx, pB_offset + i) ;
+                            GB_BINOP (GB_CX (pC), aij, bij) ;
+                        }
+
                     }
-                    else if (bfound)
+                    else // (A == M) && (B == M)
                     { 
-                        // C (i,j) = B (i,j)
-                        #if defined ( GB_PHASE_1_OF_2 )
-                        cjnz++ ;
-                        #else
-                        Ci [pC] = i ;
-                        GB_COPY_B_TO_C (GB_CX (pC), Bx, pB) ;
-                        pC++ ;
-                        #endif
+
+                        //------------------------------------------------------
+                        // A == M == B: all three matrices are the same
+                        //------------------------------------------------------
+
+                        GB_PRAGMA_VECTORIZE
+                        for (int64_t p = 0 ; p < mjnz ; p++)
+                        {
+                            int64_t pM = p + pM_start ;
+                            int64_t pC = p + pC_start ;
+                            #if GB_OP_IS_SECOND
+                            GB_GETB (t, Bx, pM) ;
+                            #else
+                            GB_GETA (t, Ax, pM) ;
+                            #endif
+                            GB_BINOP (GB_CX (pC), t, t) ;
+                        }
                     }
+
+                    #endif
+
                 }
+                else
+                {
 
-                #if defined ( GB_PHASE_2_OF_2 )
-                ASSERT (pC == pC_end) ;
-                #endif
+                    //----------------------------------------------------------
+                    // scan M(:,j) and count nnz (C (:,j))
+                    //----------------------------------------------------------
 
+                    for ( ; pM < pM_end ; pM++)
+                    {
+
+                        //------------------------------------------------------
+                        // get M(i,j) for A(i,j) + B (i,j)
+                        //------------------------------------------------------
+
+                        int64_t i = Mi [pM] ;
+                        bool mij = GB_mcast (Mx, pM, msize) ;
+                        if (!mij) continue ;
+
+                        //------------------------------------------------------
+                        // get A(i,j)
+                        //------------------------------------------------------
+
+                        bool afound ;
+                        if (adense)
+                        { 
+                            // A is dense; use quick lookup
+                            pA = pA_start + (i - iA_first) ;
+                            afound = true ;
+                        }
+                        else if (A == M)
+                        { 
+                            // A is aliased to M
+                            pA = pM ;
+                            afound = true ;
+                        }
+                        else
+                        { 
+                            // A is sparse; use binary search
+                            int64_t apright = pA_end - 1 ;
+                            GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
+                        }
+
+                        ASSERT (GB_IMPLIES (afound, Ai [pA] == i)) ;
+
+                        //------------------------------------------------------
+                        // get B(i,j)
+                        //------------------------------------------------------
+
+                        bool bfound ;
+                        if (bdense)
+                        { 
+                            // B is dense; use quick lookup
+                            pB = pB_start + (i - iB_first) ;
+                            bfound = true ;
+                        }
+                        else if (B == M)
+                        { 
+                            // B is aliased to M
+                            pB = pM ;
+                            bfound = true ;
+                        }
+                        else
+                        { 
+                            // B is sparse; use binary search
+                            int64_t bpright = pB_end - 1 ;
+                            GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
+                        }
+
+                        ASSERT (GB_IMPLIES (bfound, Bi [pB] == i)) ;
+
+                        //------------------------------------------------------
+                        // C(i,j) = A(i,j) + B(i,j)
+                        //------------------------------------------------------
+
+                        if (afound && bfound)
+                        { 
+                            // C (i,j) = A (i,j) + B (i,j)
+                            #if defined ( GB_PHASE_1_OF_2 )
+                            cjnz++ ;
+                            #else
+                            Ci [pC] = i ;
+                            GB_GETA (aij, Ax, pA) ;
+                            GB_GETB (bij, Bx, pB) ;
+                            GB_BINOP (GB_CX (pC), aij, bij) ;
+                            pC++ ;
+                            #endif
+                        }
+                        else if (afound)
+                        { 
+                            // C (i,j) = A (i,j)
+                            #if defined ( GB_PHASE_1_OF_2 )
+                            cjnz++ ;
+                            #else
+                            Ci [pC] = i ;
+                            GB_COPY_A_TO_C (GB_CX (pC), Ax, pA) ;
+                            pC++ ;
+                            #endif
+                        }
+                        else if (bfound)
+                        { 
+                            // C (i,j) = B (i,j)
+                            #if defined ( GB_PHASE_1_OF_2 )
+                            cjnz++ ;
+                            #else
+                            Ci [pC] = i ;
+                            GB_COPY_B_TO_C (GB_CX (pC), Bx, pB) ;
+                            pC++ ;
+                            #endif
+                        }
+                    }
+
+                    #if defined ( GB_PHASE_2_OF_2 )
+                    ASSERT (pC == pC_end) ;
+                    #endif
+                }
             }
 
             //------------------------------------------------------------------
diff --git a/Source/Template/GB_binop_factory.c b/Source/Template/GB_binop_factory.c
index 4caa691455..92de761ca5 100644
--- a/Source/Template/GB_binop_factory.c
+++ b/Source/Template/GB_binop_factory.c
@@ -2,7 +2,7 @@
 // GB_binop_factory: switch factory for built-in methods for C=binop(A,B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -18,6 +18,8 @@
     switch (opcode)
     {
 
+#ifndef GB_BINOP_SUBSET
+
         //----------------------------------------------------------------------
         case GB_FIRST_opcode   :    // z = x
         //----------------------------------------------------------------------
@@ -28,12 +30,23 @@
 
         //----------------------------------------------------------------------
         case GB_SECOND_opcode  :    // z = y
+        case GB_ANY_opcode  :       // z = y
         //----------------------------------------------------------------------
 
             #define GB_BINOP_NAME _second
             #include "GB_binop_type_factory.c"
             break ;
 
+        //----------------------------------------------------------------------
+        case GB_PAIR_opcode   :    // z = 1
+        //----------------------------------------------------------------------
+
+            #define GB_BINOP_NAME _pair
+            #include "GB_binop_type_factory.c"
+            break ;
+
+#endif
+
         //----------------------------------------------------------------------
         case GB_MIN_opcode     :    // z = min(x,y)
         //----------------------------------------------------------------------
@@ -114,6 +127,8 @@
             #include "GB_binop_type_factory.c"
             break ;
 
+#ifndef GB_BINOP_SUBSET
+
         //----------------------------------------------------------------------
         case GB_ISEQ_opcode    :    // z = (x == y)
         //----------------------------------------------------------------------
@@ -247,6 +262,7 @@
             #define GB_BINOP_NAME _lxor
             #include "GB_binop_type_factory.c"
             break ;
+#endif
 
         default: ;
     }
diff --git a/Source/Template/GB_binop_type_factory.c b/Source/Template/GB_binop_type_factory.c
index e5b4c436b1..621fa68659 100644
--- a/Source/Template/GB_binop_type_factory.c
+++ b/Source/Template/GB_binop_type_factory.c
@@ -2,27 +2,15 @@
 // GB_binop_type_factory.c: switch factory for binary operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 // A template file #include'd in GB_binop_factory.c
 
-// This file is used for 19 operators.  The operator is combined here
-// with 10 or 11 types to create 10 or 11 unique workers.
-
-//      FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
-//      ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//      LAND, LOR, LXOR.
-
-// For all of them, the types of x, y, and z are the same.  There are 10
-// non-boolean operators and 0 or 1 boolean operators defined here.
-
-// GB_NO_BOOLEAN is defined for 15 of these multiply operators in the
-// #include'ing file (min, max, plus, minus, rminus, times, div, rdiv, is*)
-// since those multiply operators are redundant and have been renamed.  For
-// these, the boolean operators are not needed.
+// GB_NO_BOOLEAN is defined for redundant boolean multiply operators in the
+// #include'ing file (min, max, plus, minus, rminus, times, div, rdiv, is*).
 
 {
     switch (xycode)
diff --git a/Source/Template/GB_critical_section.c b/Source/Template/GB_critical_section.c
index 7af5d7a665..cc0d93eeec 100644
--- a/Source/Template/GB_critical_section.c
+++ b/Source/Template/GB_critical_section.c
@@ -2,7 +2,7 @@
 // Source/Template/GB_critical_section: execute code in a critical section
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_dense_ewise3_accum_template.c b/Source/Template/GB_dense_ewise3_accum_template.c
new file mode 100644
index 0000000000..204056a503
--- /dev/null
+++ b/Source/Template/GB_dense_ewise3_accum_template.c
@@ -0,0 +1,95 @@
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_accum_template: C += A+B where all 3 matrices are dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// FUTURE: allow the accum and the 'plus' op to differ (as in C += A-B,
+// with PLUS as the accum and MINUS as the operator, so CBLAS can be used
+// for this combination.
+
+{
+
+    //--------------------------------------------------------------------------
+    // get A, B, and C
+    //--------------------------------------------------------------------------
+
+    // any matrix may be aliased to any other (C==A, C==B, and/or A==B)
+    GB_ATYPE *Ax = A->x ;
+    GB_BTYPE *Bx = B->x ;
+    GB_CTYPE *Cx = C->x ;
+    const int64_t cnz = GB_NNZ (C) ;
+    int64_t p ;
+
+    //--------------------------------------------------------------------------
+    // C += A+B where all 3 matries are dense
+    //--------------------------------------------------------------------------
+
+    if (A == B)
+    {
+
+        //----------------------------------------------------------------------
+        // C += A+A where A and C are dense
+        //----------------------------------------------------------------------
+
+        // If the op is PLUS, this becomes C += 2*A.  If the op is MINUS,
+        // almost nothing happens since C=C-(A-A) = C, except if A has Infs or
+        // NaNs.  In this case, don't bother to call the CBLAS if the op is
+        // MINUS.
+
+        #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 2, Ax, Cx, nthreads) ;   // C += 2*A
+
+        #else
+
+            // C += A+A
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETA (aij, Ax, p) ;                  // aij = Ax [p]
+                GB_CTYPE_SCALAR (t) ;                   // declare scalar t
+                GB_BINOP (t, aij, aij) ;                // t = aij + aij
+                GB_BINOP (GB_CX (p), GB_CX (p), t) ;    // Cx [p] = cij + t
+            }
+
+        #endif
+
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // C += A+B where all 3 matrices are dense
+        //----------------------------------------------------------------------
+
+        #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 1, Ax, Cx, nthreads) ;   // C += A
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 1, Bx, Cx, nthreads) ;   // C += B
+
+        #elif GB_HAS_CBLAS & GB_OP_IS_MINUS_REAL
+
+            // C -= (A-B)
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) -1, Ax, Cx, nthreads) ;  // C -= A
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE)  1, Bx, Cx, nthreads) ;  // C += B
+
+        #else
+
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETA (aij, Ax, p) ;                  // aij = Ax [p]
+                GB_GETB (bij, Bx, p) ;                  // bij = Bx [p]
+                GB_CTYPE_SCALAR (t) ;                   // declare scalar t
+                GB_BINOP (t, aij, bij) ;                // t = aij + bij
+                GB_BINOP (GB_CX (p), GB_CX (p), t) ;    // Cx [p] = cij + t
+            }
+
+        #endif
+    }
+}
+
diff --git a/Source/Template/GB_dense_ewise3_noaccum_template.c b/Source/Template/GB_dense_ewise3_noaccum_template.c
new file mode 100644
index 0000000000..9f29313cf0
--- /dev/null
+++ b/Source/Template/GB_dense_ewise3_noaccum_template.c
@@ -0,0 +1,118 @@
+//------------------------------------------------------------------------------
+// GB_dense_ewise3_noaccum_template: C = A+B where all 3 matrices are dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_unused.h"
+
+{
+
+    //--------------------------------------------------------------------------
+    // get A, B, and C
+    //--------------------------------------------------------------------------
+
+    // any matrix may be aliased to any other (C==A, C==B, and/or A==B)
+    GB_ATYPE *Ax = A->x ;
+    GB_BTYPE *Bx = B->x ;
+    GB_CTYPE *Cx = C->x ;
+    const int64_t cnz = GB_NNZ (C) ;
+    ASSERT (GB_is_dense (A)) ;
+    ASSERT (GB_is_dense (B)) ;
+    ASSERT (GB_is_dense (C)) ;
+    int64_t p ;
+
+    //--------------------------------------------------------------------------
+    // C = A+B where all 3 matrices are dense
+    //--------------------------------------------------------------------------
+
+    if (C == B)
+    {
+
+        //----------------------------------------------------------------------
+        // C = A+C where A and C are dense
+        //----------------------------------------------------------------------
+
+        #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 1, Ax, Cx, nthreads) ;   // C += A
+
+        #elif GB_HAS_CBLAS & GB_OP_IS_MINUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) -1, Ax, Cx, nthreads) ;  // C -= A
+
+        #else
+
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETA (aij, Ax, p) ;                  // aij = Ax [p]
+                GB_BINOP (GB_CX (p), aij, GB_CX (p)) ;  // Cx [p] = aij + Cx [p]
+            }
+
+        #endif
+
+    }
+    else if (C == A)
+    {
+
+        //----------------------------------------------------------------------
+        // C = C+B where B and C are dense
+        //----------------------------------------------------------------------
+
+        #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 1, Bx, Cx, nthreads) ;   // C += B
+
+        #elif GB_HAS_CBLAS & GB_OP_IS_MINUS_REAL
+
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) -1, Bx, Cx, nthreads) ;  // C -= B
+
+        #else
+
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETB (bij, Bx, p) ;                  // bij = Bx [p]
+                GB_BINOP (GB_CX (p), GB_CX (p), bij) ;  // Cx [p] += bij
+            }
+
+        #endif
+
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // C = A+B where all 3 matrices are dense
+        //----------------------------------------------------------------------
+
+        // note that A and B may still be aliased to each other
+
+        #if GB_HAS_CBLAS && GB_OP_IS_PLUS_REAL
+
+            GB_memcpy (Cx, Ax, cnz * sizeof (GB_CTYPE), nthreads) ; // C = A
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) 1, Bx, Cx, nthreads) ;   // C += B
+
+        #elif GB_HAS_CBLAS && GB_OP_IS_MINUS_REAL
+
+            GB_memcpy (Cx, Ax, cnz * sizeof (GB_CTYPE), nthreads) ; // C = A
+            GB_CBLAS_AXPY (cnz, (GB_CTYPE) -1, Bx, Cx, nthreads) ;  // C -= B
+
+        #else
+
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETA (aij, Ax, p) ;              // aij = Ax [p]
+                GB_GETB (bij, Bx, p) ;              // bij = Bx [p]
+                GB_BINOP (GB_CX (p), aij, bij) ;    // Cx [p] = aij + bij
+            }
+
+        #endif
+    }
+}
+
diff --git a/Source/Template/GB_dense_subassign_05d_template.c b/Source/Template/GB_dense_subassign_05d_template.c
new file mode 100644
index 0000000000..6933644c7e
--- /dev/null
+++ b/Source/Template/GB_dense_subassign_05d_template.c
@@ -0,0 +1,85 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_05d_template: C<M> = x where C is dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+{
+
+    //--------------------------------------------------------------------------
+    // get C and M
+    //--------------------------------------------------------------------------
+
+    const int64_t *GB_RESTRICT Mp = M->p ;
+    const int64_t *GB_RESTRICT Mh = M->h ;
+    const int64_t *GB_RESTRICT Mi = M->i ;
+    const GB_void *GB_RESTRICT Mx = (Mask_struct ? NULL : (M->x)) ;
+    const size_t msize = M->type->size ;
+
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+    const int64_t cvlen = C->vlen ;
+
+    //--------------------------------------------------------------------------
+    // C<M> = x
+    //--------------------------------------------------------------------------
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        // if kfirst > klast then taskid does no work at all
+        int64_t kfirst = kfirst_slice [taskid] ;
+        int64_t klast  = klast_slice  [taskid] ;
+
+        //----------------------------------------------------------------------
+        // C<M(:,kfirst:klast)> = x
+        //----------------------------------------------------------------------
+
+        for (int64_t k = kfirst ; k <= klast ; k++)
+        {
+
+            //------------------------------------------------------------------
+            // find the part of M(:,k) to be operated on by this task
+            //------------------------------------------------------------------
+
+            int64_t j = (Mh == NULL) ? k : Mh [k] ;
+            int64_t pM_start, pM_end ;
+            GB_get_pA_and_pC (&pM_start, &pM_end, NULL,
+                taskid, k, kfirst, klast, pstart_slice, NULL, NULL, Mp) ;
+
+            // pC points to the start of C(:,j) if C is dense
+            int64_t pC = j * cvlen ;
+
+            //------------------------------------------------------------------
+            // C<M(:,j)> = x
+            //------------------------------------------------------------------
+
+            if (Mx == NULL)
+            {
+                GB_PRAGMA_VECTORIZE
+                for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+                { 
+                    int64_t p = pC + Mi [pM] ;
+                    GB_COPY_SCALAR_TO_C (p, cwork) ;        // Cx [p] = scalar
+                }
+            }
+            else
+            {
+                GB_PRAGMA_VECTORIZE
+                for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+                {
+                    if (GB_mcast (Mx, pM, msize))
+                    { 
+                        int64_t p = pC + Mi [pM] ;
+                        GB_COPY_SCALAR_TO_C (p, cwork) ;    // Cx [p] = scalar
+                    }
+                }
+            }
+        }
+    }
+}
+
diff --git a/Source/Template/GB_dense_subassign_06d_template.c b/Source/Template/GB_dense_subassign_06d_template.c
new file mode 100644
index 0000000000..8ddcfd6550
--- /dev/null
+++ b/Source/Template/GB_dense_subassign_06d_template.c
@@ -0,0 +1,84 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_06d_template: C<A> = A where C is dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+{
+
+    //--------------------------------------------------------------------------
+    // get C and A
+    //--------------------------------------------------------------------------
+
+    const int64_t  *GB_RESTRICT Ap = A->p ;
+    const int64_t  *GB_RESTRICT Ah = A->h ;
+    const int64_t  *GB_RESTRICT Ai = A->i ;
+    const GB_CTYPE *GB_RESTRICT Ax = A->x ;
+
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+    const int64_t cvlen = C->vlen ;
+
+    //--------------------------------------------------------------------------
+    // C<A> = A
+    //--------------------------------------------------------------------------
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        // if kfirst > klast then taskid does no work at all
+        int64_t kfirst = kfirst_slice [taskid] ;
+        int64_t klast  = klast_slice  [taskid] ;
+
+        //----------------------------------------------------------------------
+        // C<A(:,kfirst:klast)> = A(:,kfirst:klast)
+        //----------------------------------------------------------------------
+
+        for (int64_t k = kfirst ; k <= klast ; k++)
+        {
+
+            //------------------------------------------------------------------
+            // find the part of A(:,k) to be operated on by this task
+            //------------------------------------------------------------------
+
+            int64_t j = (Ah == NULL) ? k : Ah [k] ;
+            int64_t pA_start, pA_end ;
+            GB_get_pA_and_pC (&pA_start, &pA_end, NULL,
+                taskid, k, kfirst, klast, pstart_slice, NULL, NULL, Ap) ;
+
+            // pC points to the start of C(:,j) if C is dense
+            int64_t pC = j * cvlen ;
+
+            //------------------------------------------------------------------
+            // C<A(:,j)> = A(:,j)
+            //------------------------------------------------------------------
+
+            if (Mask_struct)
+            {
+                GB_PRAGMA_VECTORIZE
+                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                { 
+                    int64_t p = pC + Ai [pA] ;
+                    GB_COPY_A_TO_C (Cx, p, Ax, pA) ;    // Cx [p] = Ax [pA]
+                }
+            }
+            else
+            {
+                GB_PRAGMA_VECTORIZE
+                for (int64_t pA = pA_start ; pA < pA_end ; pA++)
+                {
+                    if (GB_AX_MASK (Ax, pA, asize))
+                    { 
+                        int64_t p = pC + Ai [pA] ;
+                        GB_COPY_A_TO_C (Cx, p, Ax, pA) ;    // Cx [p] = Ax [pA]
+                    }
+                }
+            }
+        }
+    }
+}
+
diff --git a/Source/Template/GB_dense_subassign_22_template.c b/Source/Template/GB_dense_subassign_22_template.c
new file mode 100644
index 0000000000..c8fce515bc
--- /dev/null
+++ b/Source/Template/GB_dense_subassign_22_template.c
@@ -0,0 +1,33 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_22_template: C += x where C is dense and x is a scalar
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// All entries in C+=A are computed fully in parallel, using the same kind of
+// parallelism as Template/GB_AxB_colscale.c.
+
+{
+
+    //--------------------------------------------------------------------------
+    // get C
+    //--------------------------------------------------------------------------
+
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+    const int64_t cnz = GB_NNZ (C) ;
+
+    //--------------------------------------------------------------------------
+    // C += x where C is dense and x is a scalar
+    //--------------------------------------------------------------------------
+
+    int64_t pC ;
+    #pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (pC = 0 ; pC < cnz ; pC++)
+    { 
+        GB_BINOP (GB_CX (pC), GB_CX (pC), ywork) ;
+    }
+}
+
diff --git a/Source/Template/GB_dense_subassign_23_template.c b/Source/Template/GB_dense_subassign_23_template.c
new file mode 100644
index 0000000000..f70e1d91f5
--- /dev/null
+++ b/Source/Template/GB_dense_subassign_23_template.c
@@ -0,0 +1,199 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_23_template: C += A where C is dense; A is sparse or dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// All entries in C+=A are computed fully in parallel, using the same kind of
+// parallelism as Template/GB_AxB_colscale.c.
+
+#include "GB_unused.h"
+
+{
+
+    //--------------------------------------------------------------------------
+    // get C and A
+    //--------------------------------------------------------------------------
+
+    const GB_ATYPE *GB_RESTRICT Ax = A->x ;
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+    ASSERT (GB_is_dense (C)) ;
+
+    if (kfirst_slice == NULL)
+    {
+
+        //----------------------------------------------------------------------
+        // C += A when both C and A are dense
+        //----------------------------------------------------------------------
+
+        ASSERT (GB_is_dense (A)) ;
+        const int64_t cnz = GB_NNZ (C) ;
+
+        #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+            // C += A via GB_cblas_daxpy or GB_cblas_saxpy
+            GB_CBLAS_AXPY           // Y += alpha*X
+            (
+                cnz,                // length of X and Y (note: int64_t)
+                (GB_CTYPE) 1,       // alpha is 1.0
+                Ax,                 // X, always stride 1
+                Cx,                 // Y, always stride 1
+                nthreads            // maximum # of threads to use
+            ) ;
+
+        #elif GB_HAS_CBLAS & GB_OP_IS_MINUS_REAL
+
+            // C -= A via GB_cblas_daxpy or GB_cblas_saxpy
+            GB_CBLAS_AXPY           // Y += alpha*X
+            (
+                cnz,                // length of X and Y (note: int64_t)
+                (GB_CTYPE) -1,      // alpha is -1.0
+                Ax,                 // X, always stride 1
+                Cx,                 // Y, always stride 1
+                nthreads            // maximum # of threads to use
+            ) ;
+
+        #else
+
+            int64_t p ;
+            #pragma omp parallel for num_threads(nthreads) schedule(static)
+            for (p = 0 ; p < cnz ; p++)
+            { 
+                GB_GETB (aij, Ax, p) ;                  // aij = A(i,j)
+                GB_BINOP (GB_CX (p), GB_CX (p), aij) ;  // C(i,j) += aij
+            }
+
+        #endif
+    }
+    else
+    {
+
+        //----------------------------------------------------------------------
+        // C += A when C is dense and A is sparse
+        //----------------------------------------------------------------------
+
+        const int64_t  *GB_RESTRICT Ap = A->p ;
+        const int64_t  *GB_RESTRICT Ah = A->h ;
+        const int64_t  *GB_RESTRICT Ai = A->i ;
+        const int64_t cvlen = C->vlen ;
+
+        int taskid ;
+        #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+        for (taskid = 0 ; taskid < ntasks ; taskid++)
+        {
+
+            // if kfirst > klast then taskid does no work at all
+            int64_t kfirst = kfirst_slice [taskid] ;
+            int64_t klast  = klast_slice  [taskid] ;
+
+            //------------------------------------------------------------------
+            // C(:,kfirst:klast) += A(:,kfirst:klast)
+            //------------------------------------------------------------------
+
+            for (int64_t k = kfirst ; k <= klast ; k++)
+            {
+
+                //--------------------------------------------------------------
+                // find the part of A(:,k) and C(:,k) for this task
+                //--------------------------------------------------------------
+
+                int64_t j = (Ah == NULL) ? k : Ah [k] ;
+                int64_t my_pA_start, my_pA_end ;
+                GB_get_pA_and_pC (&my_pA_start, &my_pA_end, NULL,
+                    taskid, k, kfirst, klast, pstart_slice, NULL, NULL, Ap) ;
+
+                int64_t pA_start = Ap [k] ;
+                bool ajdense = ((Ap [k+1] - pA_start) == cvlen) ;
+
+                // pC points to the start of C(:,j) if C is dense
+                int64_t pC = j * cvlen ;
+
+                //--------------------------------------------------------------
+                // C(:,j) += A(:,j)
+                //--------------------------------------------------------------
+
+                if (ajdense)
+                { 
+
+                    //----------------------------------------------------------
+                    // both C(:,j) and A(:,j) are dense
+                    //----------------------------------------------------------
+
+                    #if GB_HAS_CBLAS & GB_OP_IS_PLUS_REAL
+
+                        // y += x via GB_cblas_daxpy or GB_cblas_saxpy.
+                        // use a single thread since this is already in a
+                        // parallel region.
+
+                        int64_t len = my_pA_end - my_pA_start ;
+                        int64_t i = my_pA_start - pA_start ;
+                        int64_t p = pC + i ;
+
+                        GB_CBLAS_AXPY           // Y += alpha*X
+                        (
+                            len,                // length of X and Y
+                            (GB_CTYPE) 1,       // alpha is 1.0
+                            Ax + my_pA_start,   // X, always stride 1
+                            Cx + p,             // Y, always stride 1
+                            1                   // use a single thread
+                        ) ;
+
+                    #elif GB_HAS_CBLAS & GB_OP_IS_MINUS_REAL
+
+                        // y -= x via GB_cblas_daxpy or GB_cblas_saxpy.
+                        // use a single thread since this is already in a
+                        // parallel region.
+
+                        int64_t len = my_pA_end - my_pA_start ;
+                        int64_t i = my_pA_start - pA_start ;
+                        int64_t p = pC + i ;
+
+                        GB_CBLAS_AXPY           // Y += alpha*X
+                        (
+                            len,                // length of X and Y
+                            (GB_CTYPE) -1,      // alpha is -1.0
+                            Ax + my_pA_start,   // X, always stride 1
+                            Cx + p,             // Y, always stride 1
+                            1                   // use a single thread
+                        ) ;
+
+                    #else
+
+                        GB_PRAGMA_VECTORIZE
+                        for (int64_t pA = my_pA_start ; pA < my_pA_end ; pA++)
+                        { 
+                            int64_t i = pA - pA_start ;
+                            int64_t p = pC + i ;
+                            // aij = A(i,j)
+                            GB_GETB (aij, Ax, pA) ;
+                            // C(i,j) += aij
+                            GB_BINOP (GB_CX (p), GB_CX (p), aij) ;
+                        }
+
+                    #endif
+
+                }
+                else
+                {
+
+                    //----------------------------------------------------------
+                    // C(:,j) is dense; A(:,j) is sparse 
+                    //----------------------------------------------------------
+
+                    GB_PRAGMA_VECTORIZE
+                    for (int64_t pA = my_pA_start ; pA < my_pA_end ; pA++)
+                    { 
+                        int64_t i = Ai [pA] ;
+                        int64_t p = pC + i ;
+                        GB_GETB (aij, Ax, pA) ;                 // aij = A(i,j)
+                        GB_BINOP (GB_CX (p), GB_CX (p), aij) ;  // C(i,j) += aij
+                    }
+                }
+            }
+        }
+    }
+}
+
diff --git a/Source/Template/GB_dense_subassign_25_template.c b/Source/Template/GB_dense_subassign_25_template.c
new file mode 100644
index 0000000000..703cc2a553
--- /dev/null
+++ b/Source/Template/GB_dense_subassign_25_template.c
@@ -0,0 +1,73 @@
+//------------------------------------------------------------------------------
+// GB_dense_subassign_25_template: C<M> = A where C is empty and A is dense
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+// C<M> = A where C starts as empty, M is structural, and A is dense.  The
+// pattern of C is an exact copy of M.
+
+{
+
+    //--------------------------------------------------------------------------
+    // get C, M, and A
+    //--------------------------------------------------------------------------
+
+    GB_CTYPE *GB_RESTRICT Cx = C->x ;
+
+    const int64_t *GB_RESTRICT Mp = M->p ;
+    const int64_t *GB_RESTRICT Mh = M->h ;
+    const int64_t *GB_RESTRICT Mi = M->i ;
+
+    const GB_CTYPE *GB_RESTRICT Ax = A->x ;
+    const int64_t avlen = A->vlen ;
+
+    //--------------------------------------------------------------------------
+    // C<M> = A
+    //--------------------------------------------------------------------------
+
+    int taskid ;
+    #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
+    for (taskid = 0 ; taskid < ntasks ; taskid++)
+    {
+
+        // if kfirst > klast then taskid does no work at all
+        int64_t kfirst = kfirst_slice [taskid] ;
+        int64_t klast  = klast_slice  [taskid] ;
+
+        //----------------------------------------------------------------------
+        // C<M(:,kfirst:klast)> = A(:,kfirst:klast)
+        //----------------------------------------------------------------------
+
+        for (int64_t k = kfirst ; k <= klast ; k++)
+        {
+
+            //------------------------------------------------------------------
+            // find the part of M(:,k) to be operated on by this task
+            //------------------------------------------------------------------
+
+            int64_t j = (Mh == NULL) ? k : Mh [k] ;
+            int64_t pM_start, pM_end ;
+            GB_get_pA_and_pC (&pM_start, &pM_end, NULL,
+                taskid, k, kfirst, klast, pstart_slice, NULL, NULL, Mp) ;
+
+            // pA points to the start of A(:,j) since A is dense
+            int64_t pA = j * avlen ;
+
+            //------------------------------------------------------------------
+            // C<M(:,j)> = A(:,j)
+            //------------------------------------------------------------------
+
+            GB_PRAGMA_VECTORIZE
+            for (int64_t pM = pM_start ; pM < pM_end ; pM++)
+            { 
+                int64_t p = pA + Mi [pM] ;
+                GB_COPY_A_TO_C (Cx, pM, Ax, p) ;    // Cx [pM] = Ax [p]
+            }
+        }
+    }
+}
+
diff --git a/Source/Template/GB_emult_template.c b/Source/Template/GB_emult_template.c
index 3b2b15257f..41330fd2e3 100644
--- a/Source/Template/GB_emult_template.c
+++ b/Source/Template/GB_emult_template.c
@@ -2,7 +2,7 @@
 // GB_emult_template:  phase1 and phase2 for C=A.*B, C<M>=A.*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,7 +20,7 @@
 
 {
 
-    // iB_first is unused if the operator is FIRST
+    // iB_first is unused if the operator is FIRST or PAIR
     #include "GB_unused.h"
 
     //--------------------------------------------------------------------------
@@ -40,15 +40,13 @@
     const int64_t *GB_RESTRICT Mh = NULL ;
     const int64_t *GB_RESTRICT Mi = NULL ;
     const GB_void *GB_RESTRICT Mx = NULL ;
-    GB_cast_function cast_M = NULL ;
     size_t msize = 0 ;
     if (M != NULL)
     { 
         Mp = M->p ;
         Mh = M->h ;
         Mi = M->i ;
-        Mx = M->x ;
-        cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
+        Mx = (Mask_struct ? NULL : (M->x)) ;
         msize = M->type->size ;
     }
 
@@ -147,6 +145,7 @@
 
             int64_t ajnz = pA_end - pA ;        // nnz in A(:,j) for this slice
             bool adense = (ajnz == len) ;
+            int64_t pA_start = pA ;
 
             // get the first and last indices in A(:,j) for this vector
             int64_t iA_first = -1 ;
@@ -188,6 +187,7 @@
 
             int64_t bjnz = pB_end - pB ;        // nnz in B(:,j) for this slice
             bool bdense = (bjnz == len) ;
+            int64_t pB_start = pB ;
 
             // get the first and last indices in B(:,j) for this vector
             int64_t iB_first = -1 ;
@@ -459,34 +459,53 @@
                 //--------------------------------------------------------------
 
                 for ( ; pM < pM_end ; pM++)
-                { 
+                {
 
                     //----------------------------------------------------------
                     // get M(i,j) for A(i,j) .* B (i,j)
                     //----------------------------------------------------------
 
                     int64_t i = Mi [pM] ;
-                    bool mij ;
-                    cast_M (&mij, Mx +(pM*msize), 0) ;
+                    bool mij = GB_mcast (Mx, pM, msize) ;
                     if (!mij) continue ;
 
                     //----------------------------------------------------------
                     // get A(i,j)
                     //----------------------------------------------------------
 
-                    int64_t apright = pA_end - 1 ;
-                    bool afound ;
-                    GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
-                    if (!afound) continue ;
+                    if (adense)
+                    { 
+                        // A(:,j) is dense; use direct lookup for A(i,j)
+                        pA = pA_start + i - iA_first ;
+                    }
+                    else
+                    { 
+                        // A(:,j) is sparse; use binary search for A(i,j)
+                        int64_t apright = pA_end - 1 ;
+                        bool afound ;
+                        GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
+                        if (!afound) continue ;
+                    }
+                    ASSERT (Ai [pA] == i) ;
 
                     //----------------------------------------------------------
                     // get B(i,j)
                     //----------------------------------------------------------
 
-                    int64_t bpright = pB_end - 1 ;
-                    bool bfound ;
-                    GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
-                    if (!bfound) continue ;
+                    if (bdense)
+                    { 
+                        // B(:,j) is dense; use direct lookup for B(i,j)
+                        pB = pB_start + i - iB_first ;
+                    }
+                    else
+                    { 
+                        // B(:,j) is sparse; use binary search for B(i,j)
+                        int64_t bpright = pB_end - 1 ;
+                        bool bfound ;
+                        GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
+                        if (!bfound) continue ;
+                    }
+                    ASSERT (Bi [pB] == i) ;
 
                     //----------------------------------------------------------
                     // C(i,j) = A(i,j) .* B(i,j)
diff --git a/Source/Template/GB_mask_template.c b/Source/Template/GB_mask_template.c
index a4d4933ac1..3f74a0fd75 100644
--- a/Source/Template/GB_mask_template.c
+++ b/Source/Template/GB_mask_template.c
@@ -2,7 +2,7 @@
 // GB_mask_template:  phase1 and phase2 for R = masker (M, C, Z)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -77,7 +77,6 @@
     // const int64_t *GB_RESTRICT Mh = NULL ;
     const int64_t *GB_RESTRICT Mi = NULL ;
     const GB_void *GB_RESTRICT Mx = NULL ;
-    GB_cast_function cast_M = NULL ;
     size_t msize = 0 ;
     // int64_t Mnvec = 0 ;
     // bool M_is_hyper = false ;
@@ -86,8 +85,7 @@
         Mp = M->p ;
         // Mh = M->h ;
         Mi = M->i ;
-        Mx = M->x ;
-        cast_M = GB_cast_factory (GB_BOOL_code, M->type->code) ;
+        Mx = (Mask_struct ? NULL : (M->x)) ;
         msize = M->type->size ;
         // Mnvec = M->nvec ;
         // M_is_hyper = M->is_hyper ;
@@ -127,7 +125,7 @@
             len = TaskList [taskid].len ;
         }
         else
-        {
+        { 
             // a coarse task operates on one or more whole vectors
             len = vlen ;
         }
@@ -228,6 +226,7 @@
 
             int64_t zjnz = pZ_end - pZ ;        // nnz in Z(:,j) for this slice
             bool zdense = (zjnz == len) && (zjnz > 0) ;
+
             #ifdef GB_DEBUG
             int64_t iZ_first = -1, iZ_last = -1 ;
             if (zjnz > 0)
@@ -259,7 +258,14 @@
                     pM_end = Mp [kM+1] ;
                 }
             }
+
             int64_t mjnz = pM_end - pM ;    // nnz (M (:,j))
+            bool mdense = (mjnz == len) && (mjnz > 0) ;
+
+            // get the first index in M(:,j) for this vector
+            int64_t iM_first = -1 ;
+            int64_t pM_first = pM ;
+            if (mjnz > 0) iM_first = Mi [pM_first] ;
 
             //------------------------------------------------------------------
             // phase1: count nnz (R(:,j)); phase2: compute R(:,j)
@@ -329,7 +335,7 @@
                     bool mij = false ;
                     if (i == iM)
                     { 
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
+                        mij = GB_mcast (Mx, pM, msize) ;
                         pM++ ;
                     }
                     if (Mask_comp) mij = !mij ;
@@ -367,19 +373,48 @@
                     // get M(i,j)
                     //----------------------------------------------------------
 
-                    // Use GB_BINARY_SPLIT_SEARCH so that pM can be used in
-                    // the for loop with index pM in the wrapup phase.
-
                     bool mij = false ;
-                    int64_t pright = pM_end - 1 ;
-                    bool found ;
-                    GB_BINARY_SPLIT_SEARCH (i, Mi, pM, pright, found) ;
-                    if (found)
-                    {
-                        cast_M (&mij, Mx +(pM*msize), 0) ;
+
+                    if (mdense)
+                    { 
+
+                        //------------------------------------------------------
+                        // M(:,j) is dense
+                        //------------------------------------------------------
+
+                        // mask is dense, lookup M(i,j)
+                        // iM_first == Mi [pM_first]
+                        // iM_first + delta == Mi [pM_first + delta]
+                        // let i = iM_first + delta
+                        // let pM = pM_first + delta
+                        // then delta = i - iM_first
+                        pM = pM_first + (i - iM_first) ;
+                        ASSERT (i == Mi [pM]) ;
+                        mij = GB_mcast (Mx, pM, msize) ;
                         // increment pM for the wrapup phase below
                         pM++ ;
                     }
+                    else
+                    {
+
+                        //------------------------------------------------------
+                        // M(:,j) is sparse
+                        //------------------------------------------------------
+
+                        // Use GB_SPLIT_BINARY_SEARCH so that pM can be used in
+                        // the for loop with index pM in the wrapup phase.
+                        int64_t pright = pM_end - 1 ;
+                        bool found ;
+                        GB_SPLIT_BINARY_SEARCH (i, Mi, pM, pright, found) ;
+                        if (found)
+                        { 
+                            ASSERT (i == Mi [pM]) ;
+                            mij = GB_mcast (Mx, pM, msize) ;
+                            // increment pM for the wrapup phase below
+                            pM++ ;
+                        }
+                    }
+
                     if (Mask_comp) mij = !mij ;
 
                     //----------------------------------------------------------
@@ -436,7 +471,25 @@
                         // mask is not complemented
                         //------------------------------------------------------
 
-                        if (zjnz > 32 * mjnz)
+                        if (mdense)
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is dense
+                            //--------------------------------------------------
+
+                            for ( ; pZ < pZ_end ; pZ++)
+                            { 
+                                int64_t i = Zi [pZ] ;
+                                // mask is dense, lookup M(i,j)
+                                pM = pM_first + (i - iM_first) ;
+                                ASSERT (i == Mi [pM]) ;
+                                bool mij = GB_mcast (Mx, pM, msize) ;
+                                if (mij) GB_COPY_Z ;
+                            }
+
+                        }
+                        else if (zjnz > 32 * mjnz)
                         {
 
                             //--------------------------------------------------
@@ -448,9 +501,7 @@
 
                             for ( ; pM < pM_end ; pM++)
                             {
-                                bool mij ;
-                                cast_M (&mij, Mx +(pM*msize), 0) ;
-                                if (mij)
+                                if (GB_mcast (Mx, pM, msize))
                                 { 
                                     int64_t i = Mi [pM] ;
                                     int64_t pright = pZ_end - 1 ;
@@ -474,8 +525,8 @@
                                 bool mij = false ;
                                 int64_t pright = pM_end - 1 ;
                                 bool found ;
-                                GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
-                                if (found) cast_M (&mij, Mx +(pM*msize), 0) ;
+                                GB_BINARY_SEARCH (i, Mi, pM, pright,found) ;
+                                if (found) mij = GB_mcast (Mx, pM, msize) ;
                                 if (mij) GB_COPY_Z ;
                             }
 
@@ -504,9 +555,7 @@
                                 else
                                 { 
                                     // both M(i,j) and Z(i,j) exist
-                                    bool mij ;
-                                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                                    if (mij) GB_COPY_Z ;
+                                    if (GB_mcast (Mx, pM, msize)) GB_COPY_Z ;
                                     pM++ ;
                                     pZ++ ;
                                 }
@@ -521,15 +570,40 @@
                         // complemented mask, and C(:,j) empty
                         //------------------------------------------------------
 
-                        for ( ; pZ < pZ_end ; pZ++)
-                        { 
-                            int64_t i = Zi [pZ] ;
-                            bool mij = false ;  // M(i,j) false if not present
-                            int64_t pright = pM_end - 1 ;
-                            bool found ;
-                            GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
-                            if (found) cast_M (&mij, Mx +(pM*msize), 0) ;
-                            if (!mij) GB_COPY_Z ;   // mask is complemented
+                        if (mdense)
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is dense
+                            //--------------------------------------------------
+
+                            for ( ; pZ < pZ_end ; pZ++)
+                            { 
+                                int64_t i = Zi [pZ] ;
+                                // mask is dense, lookup M(i,j)
+                                pM = pM_first + (i - iM_first) ;
+                                ASSERT (i == Mi [pM]) ;
+                                bool mij = GB_mcast (Mx, pM, msize) ;
+                                if (!mij) GB_COPY_Z ;   // mask is complemented
+                            }
+                        }
+                        else
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is sparse
+                            //--------------------------------------------------
+
+                            for ( ; pZ < pZ_end ; pZ++)
+                            { 
+                                int64_t i = Zi [pZ] ;
+                                bool mij = false ;
+                                int64_t pright = pM_end - 1 ;
+                                bool found ;
+                                GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
+                                if (found) mij = GB_mcast (Mx, pM, msize) ;
+                                if (!mij) GB_COPY_Z ;   // mask is complemented
+                            }
                         }
                     }
 
@@ -548,7 +622,25 @@
                         // mask is complemented
                         //------------------------------------------------------
 
-                        if (cjnz > 32 * mjnz)
+                        if (mdense)
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is dense
+                            //--------------------------------------------------
+
+                            for ( ; pC < pC_end ; pC++)
+                            { 
+                                int64_t i = Ci [pC] ;
+                                // mask is dense, lookup M(i,j)
+                                pM = pM_first + (i - iM_first) ;
+                                ASSERT (i == Mi [pM]) ;
+                                bool mij = GB_mcast (Mx, pM, msize) ;
+                                if (mij) GB_COPY_C ;
+                            }
+
+                        }
+                        else if (cjnz > 32 * mjnz)
                         {
 
                             //--------------------------------------------------
@@ -557,9 +649,7 @@
 
                             for ( ; pM < pM_end ; pM++)
                             {
-                                bool mij ;
-                                cast_M (&mij, Mx +(pM*msize), 0) ;
-                                if (mij)
+                                if (GB_mcast (Mx, pM, msize))
                                 { 
                                     int64_t i = Mi [pM] ;
                                     int64_t pright = pC_end - 1 ;
@@ -583,8 +673,8 @@
                                 bool mij = false ;
                                 int64_t pright = pM_end - 1 ;
                                 bool found ;
-                                GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
-                                if (found) cast_M (&mij, Mx +(pM*msize), 0) ;
+                                GB_BINARY_SEARCH (i, Mi, pM, pright, found);
+                                if (found) mij = GB_mcast (Mx, pM, msize) ;
                                 if (mij) GB_COPY_C ;
                             }
 
@@ -613,9 +703,7 @@
                                 else
                                 { 
                                     // both M(i,j) and C(i,j) exist
-                                    bool mij ;
-                                    cast_M (&mij, Mx +(pM*msize), 0) ;
-                                    if (mij) GB_COPY_C ;
+                                    if (GB_mcast (Mx, pM, msize)) GB_COPY_C ;
                                     pM++ ;
                                     pC++ ;
                                 }
@@ -630,15 +718,40 @@
                         // non-complemented mask, and Z(:,j) empty
                         //------------------------------------------------------
 
-                        for ( ; pC < pC_end ; pC++)
-                        { 
-                            int64_t i = Ci [pC] ;
-                            bool mij = false ;  // M(i,j) false if not present
-                            int64_t pright = pM_end - 1 ;
-                            bool found ;
-                            GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
-                            if (found) cast_M (&mij, Mx +(pM*msize), 0) ;
-                            if (!mij) GB_COPY_C ;
+                        if (mdense)
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is dense
+                            //--------------------------------------------------
+
+                            for ( ; pC < pC_end ; pC++)
+                            { 
+                                int64_t i = Ci [pC] ;
+                                // mask is dense, lookup M(i,j)
+                                pM = pM_first + (i - iM_first) ;
+                                ASSERT (i == Mi [pM]) ;
+                                bool mij = GB_mcast (Mx, pM, msize) ;
+                                if (!mij) GB_COPY_C ;
+                            }
+                        }
+                        else
+                        {
+
+                            //--------------------------------------------------
+                            // M(:,j) is sparse
+                            //--------------------------------------------------
+
+                            for ( ; pC < pC_end ; pC++)
+                            { 
+                                int64_t i = Ci [pC] ;
+                                bool mij = false ;  // M(i,j) false if not present
+                                int64_t pright = pM_end - 1 ;
+                                bool found ;
+                                GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;
+                                if (found) mij = GB_mcast (Mx, pM, msize) ;
+                                if (!mij) GB_COPY_C ;
+                            }
                         }
                     }
                 }
diff --git a/Source/Template/GB_matrix.h b/Source/Template/GB_matrix.h
index ee92df1216..1ba1564200 100644
--- a/Source/Template/GB_matrix.h
+++ b/Source/Template/GB_matrix.h
@@ -2,7 +2,7 @@
 // GB_matrix.h: definitions for GrB_Matrix and GrB_Vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_ops_template.c b/Source/Template/GB_ops_template.c
index 60135e8c74..4e141731c6 100644
--- a/Source/Template/GB_ops_template.c
+++ b/Source/Template/GB_ops_template.c
@@ -2,7 +2,7 @@
 // GB_ops_template.c: built-in unary and binary functions and operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -35,28 +35,36 @@ GB_UNARY_OP_DEFINE (GrB_, MINV,     "minv")
 GB_UNARY_OP_DEFINE (GxB_, LNOT,     "not")
 
 //------------------------------------------------------------------------------
-// 10 binary functions z=f(x,y) where x,y,z have the same type
+// 12 binary functions z=f(x,y) where x,y,z have the same type
 //------------------------------------------------------------------------------
 
 extern void GB (FIRST_f)  (GB_Z_X_Y_ARGS) ;
 extern void GB (SECOND_f) (GB_Z_X_Y_ARGS) ;
-extern void GB (MIN_f)    (GB_Z_X_Y_ARGS) ;
-extern void GB (MAX_f)    (GB_Z_X_Y_ARGS) ;
+extern void GB (PAIR_f)   (GB_Z_X_Y_ARGS) ;
+extern void GB (ANY_f)    (GB_Z_X_Y_ARGS) ;
+
 extern void GB (PLUS_f)   (GB_Z_X_Y_ARGS) ;
 extern void GB (MINUS_f)  (GB_Z_X_Y_ARGS) ;
 extern void GB (RMINUS_f) (GB_Z_X_Y_ARGS) ;
 extern void GB (TIMES_f)  (GB_Z_X_Y_ARGS) ;
+
+extern void GB (MIN_f)    (GB_Z_X_Y_ARGS) ;
+extern void GB (MAX_f)    (GB_Z_X_Y_ARGS) ;
 extern void GB (DIV_f)    (GB_Z_X_Y_ARGS) ;
 extern void GB (RDIV_f)   (GB_Z_X_Y_ARGS) ;
 
 GB_BINARY_OP_DEFINE (GrB_, FIRST,  "first" )
 GB_BINARY_OP_DEFINE (GrB_, SECOND, "second")
-GB_BINARY_OP_DEFINE (GrB_, MIN,    "min"   )
-GB_BINARY_OP_DEFINE (GrB_, MAX,    "max"   )
+GB_BINARY_OP_DEFINE (GxB_, PAIR,   "pair"  )
+GB_BINARY_OP_DEFINE (GxB_, ANY,    "any"   )
+
 GB_BINARY_OP_DEFINE (GrB_, PLUS,   "plus"  )
 GB_BINARY_OP_DEFINE (GrB_, MINUS,  "minus" )
 GB_BINARY_OP_DEFINE (GxB_, RMINUS, "rminus")
 GB_BINARY_OP_DEFINE (GrB_, TIMES,  "times" )
+
+GB_BINARY_OP_DEFINE (GrB_, MIN,    "min"   )
+GB_BINARY_OP_DEFINE (GrB_, MAX,    "max"   )
 GB_BINARY_OP_DEFINE (GrB_, DIV,    "div"   )
 GB_BINARY_OP_DEFINE (GxB_, RDIV,   "rdiv"  )
 
diff --git a/Source/Template/GB_ops_template.h b/Source/Template/GB_ops_template.h
index de3e20b668..938d1205fd 100644
--- a/Source/Template/GB_ops_template.h
+++ b/Source/Template/GB_ops_template.h
@@ -2,7 +2,7 @@
 // GB_ops_template.h: define the unary and binary functions and operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -76,12 +76,14 @@ GB_PUBLIC struct GB_UnaryOp_opaque
     GB (opaque_GxB_LNOT) ;
 
 //------------------------------------------------------------------------------
-// 8 binary functions z=f(x,y) where x,y,z have the same type
+// 12 binary functions z=f(x,y) where x,y,z have the same type
 //------------------------------------------------------------------------------
 
-// first, second, plus, minus, rminus, times
 inline void GB (FIRST_f)  (GB_Z_X_Y_ARGS) { (*z) = (*x) ; }
 inline void GB (SECOND_f) (GB_Z_X_Y_ARGS) { (*z) = (*y) ; }
+inline void GB (PAIR_f)   (GB_Z_X_Y_ARGS) { (*z) = 1 ; }
+inline void GB (ANY_f)    (GB_Z_X_Y_ARGS) { (*z) = (*y) ; } // same as SECOND
+
 inline void GB (PLUS_f)   (GB_Z_X_Y_ARGS) { (*z) = (*x) + (*y) ; }
 inline void GB (MINUS_f)  (GB_Z_X_Y_ARGS) { (*z) = (*x) - (*y) ; }
 inline void GB (RMINUS_f) (GB_Z_X_Y_ARGS) { (*z) = (*y) - (*x) ; }
@@ -143,12 +145,16 @@ inline void GB (TIMES_f)  (GB_Z_X_Y_ARGS) { (*z) = (*x) * (*y) ; }
 GB_PUBLIC struct GB_BinaryOp_opaque
     GB (opaque_GrB_FIRST),
     GB (opaque_GrB_SECOND),
-    GB (opaque_GrB_MIN),
-    GB (opaque_GrB_MAX),
+    GB (opaque_GxB_PAIR),
+    GB (opaque_GxB_ANY),
+
     GB (opaque_GrB_PLUS),
     GB (opaque_GrB_MINUS),
     GB (opaque_GxB_RMINUS),
     GB (opaque_GrB_TIMES),
+
+    GB (opaque_GrB_MIN),
+    GB (opaque_GrB_MAX),
     GB (opaque_GrB_DIV),
     GB (opaque_GxB_RDIV) ;
 
diff --git a/Source/Template/GB_qsort_template.c b/Source/Template/GB_qsort_template.c
index 32b0642240..df32ff7ca4 100644
--- a/Source/Template/GB_qsort_template.c
+++ b/Source/Template/GB_qsort_template.c
@@ -2,7 +2,7 @@
 // GB_qsort_template: quicksort of a K-by-n array
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_red_factory.c b/Source/Template/GB_red_factory.c
index c661979ca4..ffe66b7d1e 100644
--- a/Source/Template/GB_red_factory.c
+++ b/Source/Template/GB_red_factory.c
@@ -2,15 +2,15 @@
 // GB_red_factory.c: switch factory for reduction operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 // This is a generic body of code for creating hard-coded versions of code for
-// 44 combinations of associative operators and built-in types: 10 types (all
+// 55 combinations of associative operators and built-in types: 10 types (all
 // but boolean) with MIN, MAX, PLUS, and TIMES, and one type (boolean) with
-// OR, AND, XOR, and EQ
+// OR, AND, XOR, and EQ, and all 11 types for the ANY monoid.
 
 // If GB_INCLUDE_SECOND_OPERATOR is defined then an additional 11 built-in
 // workers for the SECOND operator are also created, and 11 for FIRST, for
@@ -98,6 +98,24 @@ if (typecode != GB_BOOL_code)
             }
             break ;
 
+        case GB_ANY_opcode :
+
+            switch (typecode)
+            {
+                case GB_INT8_code   : GB_RED_WORKER (_any, _int8,   int8_t  )
+                case GB_INT16_code  : GB_RED_WORKER (_any, _int16,  int16_t )
+                case GB_INT32_code  : GB_RED_WORKER (_any, _int32,  int32_t )
+                case GB_INT64_code  : GB_RED_WORKER (_any, _int64,  int64_t )
+                case GB_UINT8_code  : GB_RED_WORKER (_any, _uint8,  uint8_t )
+                case GB_UINT16_code : GB_RED_WORKER (_any, _uint16, uint16_t)
+                case GB_UINT32_code : GB_RED_WORKER (_any, _uint32, uint32_t)
+                case GB_UINT64_code : GB_RED_WORKER (_any, _uint64, uint64_t)
+                case GB_FP32_code   : GB_RED_WORKER (_any, _fp32,   float   )
+                case GB_FP64_code   : GB_RED_WORKER (_any, _fp64,   double  )
+                default: ;
+            }
+            break ;
+
         //----------------------------------------------------------------------
         // FIRST and SECOND for GB_builder
         //----------------------------------------------------------------------
@@ -162,6 +180,7 @@ else
         case GB_LAND_opcode   : GB_RED_WORKER (_land,   _bool, bool)
         case GB_LXOR_opcode   : GB_RED_WORKER (_lxor,   _bool, bool)
         case GB_EQ_opcode     : GB_RED_WORKER (_eq,     _bool, bool)
+        case GB_ANY_opcode    : GB_RED_WORKER (_any,    _bool, bool)
         #ifdef GB_INCLUDE_SECOND_OPERATOR
         case GB_FIRST_opcode  : GB_RED_WORKER (_first,  _bool, bool)
         case GB_SECOND_opcode : GB_RED_WORKER (_second, _bool, bool)
diff --git a/Source/Template/GB_reduce_build_template.c b/Source/Template/GB_reduce_build_template.c
index 4b6b838229..fe6bcbbe28 100644
--- a/Source/Template/GB_reduce_build_template.c
+++ b/Source/Template/GB_reduce_build_template.c
@@ -2,7 +2,7 @@
 // GB_build_template: T=build(S), and assemble any duplicate tuples
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_reduce_each_index.c b/Source/Template/GB_reduce_each_index.c
index 3c6e5612df..ce46431101 100644
--- a/Source/Template/GB_reduce_each_index.c
+++ b/Source/Template/GB_reduce_each_index.c
@@ -2,7 +2,7 @@
 // GB_reduce_each_index: T(i)=reduce(A(i,:)), reduce a matrix to a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,6 +14,11 @@
 // reduce all workspaces to the workspace of thread 0.  Finally, this last
 // workspace is collected into T.
 
+// If an out-of-memory condition occurs, the macro GB_FREE_ALL frees any
+// workspace.  This has no effect on the built-in workers (GB_FREE_ALL does
+// nothing), and the workspace is freed in the caller.  For the generic worker,
+// the GB_FREE_ALL macro defined in GB_reduce_to_vector frees all workspace.
+
 {
 
     //--------------------------------------------------------------------------
@@ -75,6 +80,7 @@
         GB_FREE_MEMORY (Marks, nth, sizeof (bool *)) ;
         GB_FREE_MEMORY (Tnz, nth, sizeof (int64_t)) ;
         GB_FREE_MEMORY (Count, ntasks+1, sizeof (int64_t)) ;
+        GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
     }
 
@@ -191,6 +197,7 @@
         GB_FREE_MEMORY (Work0, n, zsize) ;
         GB_FREE_MEMORY (Mark0, n, sizeof (bool)) ;
         GB_FREE_MEMORY (Count, ntasks+1, sizeof (int64_t)) ;
+        GB_FREE_ALL ;
         return (GB_OUT_OF_MEMORY) ;
     }
 
diff --git a/Source/Template/GB_reduce_each_vector.c b/Source/Template/GB_reduce_each_vector.c
index 91bc78ded4..6215988bec 100644
--- a/Source/Template/GB_reduce_each_vector.c
+++ b/Source/Template/GB_reduce_each_vector.c
@@ -2,7 +2,7 @@
 // GB_reduce_each_vector: Tx(j)=reduce(A(:,j)), reduce a matrix to a vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_reduce_panel.c b/Source/Template/GB_reduce_panel.c
index 7003dd6ca4..d5d74bcee8 100644
--- a/Source/Template/GB_reduce_panel.c
+++ b/Source/Template/GB_reduce_panel.c
@@ -2,7 +2,7 @@
 // GB_reduce_panel: s=reduce(A), reduce a matrix to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -20,6 +20,11 @@
     int64_t anz = GB_NNZ (A) ;
     ASSERT (anz > 0) ;
 
+    #if GB_IS_ANY_MONOID
+    // the ANY monoid can take any entry, and terminate immediately
+    s = Ax [anz-1] ;
+    #else
+
     //--------------------------------------------------------------------------
     // typecast workspace
     //--------------------------------------------------------------------------
@@ -41,7 +46,7 @@
         GB_ATYPE Panel [GB_PANEL] ;
         int64_t first_panel_size = GB_IMIN (GB_PANEL, anz) ;
         for (int64_t k = 0 ; k < first_panel_size ; k++)
-        {
+        { 
             Panel [k] = Ax [k] ;
         }
 
@@ -148,7 +153,7 @@
                 #pragma omp critical (GB_reduce_panel)
                 my_exit = early_exit ;
             #else
-                #pragma omp atomic read
+                GB_ATOMIC_READ
                 my_exit = early_exit ;
             #endif
 
@@ -239,7 +244,7 @@
                         #pragma omp critical (GB_reduce_panel)
                         early_exit = true ;
                     #else
-                        #pragma omp atomic write
+                        GB_ATOMIC_WRITE
                         early_exit = true ;
                     #endif
                 }
@@ -264,5 +269,6 @@
             GB_ADD_ARRAY_TO_SCALAR (s, W, tid) ;
         }
     }
+    #endif
 }
 
diff --git a/Source/Template/GB_reduce_to_scalar_template.c b/Source/Template/GB_reduce_to_scalar_template.c
index 980c05d50e..167e6d9627 100644
--- a/Source/Template/GB_reduce_to_scalar_template.c
+++ b/Source/Template/GB_reduce_to_scalar_template.c
@@ -2,7 +2,7 @@
 // GB_reduce_to_scalar_template: s=reduce(A), reduce a matrix to a scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_select_factory.c b/Source/Template/GB_select_factory.c
index fb2f481209..3836bfbed4 100644
--- a/Source/Template/GB_select_factory.c
+++ b/Source/Template/GB_select_factory.c
@@ -2,7 +2,7 @@
 // GB_select_factory: switch factory for C=select(A,thunk)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,8 +15,7 @@ switch (opcode)
     case GB_DIAG_opcode          : GB_SEL_WORKER (_diag    , _any, GB_void)
     case GB_OFFDIAG_opcode       : GB_SEL_WORKER (_offdiag , _any, GB_void)
     case GB_RESIZE_opcode        : GB_SEL_WORKER (_resize  , _any, GB_void)
-    case GB_USER_SELECT_C_opcode :
-    case GB_USER_SELECT_R_opcode : GB_SEL_WORKER (_user    , _any, GB_void)
+    case GB_USER_SELECT_opcode   : GB_SEL_WORKER (_user    , _any, GB_void)
 
     case GB_NONZOMBIE_opcode :  // A(i,j) not a zombie
 
diff --git a/Source/Template/GB_select_phase1.c b/Source/Template/GB_select_phase1.c
index 2771b33dfe..64ccfa696c 100644
--- a/Source/Template/GB_select_phase1.c
+++ b/Source/Template/GB_select_phase1.c
@@ -2,7 +2,7 @@
 // GB_select_count: count entries in eacn vector for C=select(A,thunk)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -82,7 +82,7 @@
             { 
                 // binary search for A (i,k)
                 int64_t pright = pA_end - 1 ;
-                GB_BINARY_SPLIT_SEARCH (i, Ai, p, pright, found) ;
+                GB_SPLIT_BINARY_SEARCH (i, Ai, p, pright, found) ;
             }
 
             #if defined ( GB_TRIL_SELECTOR )
diff --git a/Source/Template/GB_select_phase2.c b/Source/Template/GB_select_phase2.c
index 1966b13ecb..b16f30be01 100644
--- a/Source/Template/GB_select_phase2.c
+++ b/Source/Template/GB_select_phase2.c
@@ -2,7 +2,7 @@
 // GB_select_phase2: C=select(A,thunk)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_semiring_template.c b/Source/Template/GB_semiring_template.c
index 8c0d365af4..07e6bf8129 100644
--- a/Source/Template/GB_semiring_template.c
+++ b/Source/Template/GB_semiring_template.c
@@ -2,7 +2,7 @@
 // GB_semiring_template.c: built-in unary and binary functions and operators
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,42 +11,42 @@
 // semirings.  That file has defined either GB_BOOLEAN, or GB_TYPE as one of
 // the 10 non-boolean types.
 
-// Using built-in types and operators, 1040 unique semirings can be built.
+// Using built-in types and operators, 1355 unique semirings can be built.
 // This count excludes redundant Boolean operators (for example GrB_TIMES_BOOL
 // and GrB_LAND are different operators but they are redundant since they
 // always return the same result):
 
-// 760 semirings with a multiply operator TxT -> T where T is non-Boolean, from
+// 1000 semirings with a multiply operator TxT -> T where T is non-Boolean, from
 // the complete cross product of:
 
-//      4 add monoids (MIN, MAX, PLUS, TIMES)
-//      19 multiply operators:
-//          (FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV
-//           ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//           LOR, LAND, LXOR)
+//      5 add monoids: MIN, MAX, PLUS, TIMES, ANY
+//      20 multiply operators:
+//          FIRST, SECOND, PAIR, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV
+//          ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
+//          LOR, LAND, LXOR
 //      10 non-Boolean types, T
 
-//      a single instance of this file creates 4*19 = 76 semirings of this
+//      a single instance of this file creates 100 semirings of this
 //      form, of one type T, when T is not BOOL
 
-// 240 semirings with a comparison operator TxT -> bool, where T is
+// 300 semirings with a comparison operator TxT -> bool, where T is
 // non-Boolean, from the complete cross product of:
 
-//      4 Boolean add monoids: (LAND, LOR, LXOR, EQ)
-//      6 multiply operators: (EQ, NE, GT, LT, GE, LE)
+//      5 Boolean add monoids: LAND, LOR, LXOR, EQ, ANY
+//      6 multiply operators: EQ, NE, GT, LT, GE, LE
 //      10 non-Boolean types, T
 
-//      a single instance of this file creates 4*6 = 24 semirings of this form,
+//      a single instance of this file creates 30 semirings of this form,
 //      of one type T, when T is not BOOL
 
-// 40 semirings with purely Boolean types, bool x bool -> bool, from the
+// 55 semirings with purely Boolean types, bool x bool -> bool, from the
 // complete cross product of:
 
-//      4 Boolean add monoids (LAND, LOR, LXOR, EQ)
-//      10 multiply operators:
-//          (FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE)
+//      5 Boolean add monoids: LAND, LOR, LXOR, EQ, ANY
+//      11 multiply operators:
+//          FIRST, SECOND, PAIR, LOR, LAND, LXOR, EQ, GT, LT, GE, LE
 
-//      a single instance of this file creates all 4*10 = 40 purely Boolean
+//      a single instance of this file creates all 5*11 = 55 purely Boolean
 //      semirings, when T is BOOL and GB_BOOLEAN is defined
 
 //------------------------------------------------------------------------------
@@ -54,75 +54,89 @@
 #ifdef GB_BOOLEAN
 
 //------------------------------------------------------------------------------
-// 40 purely Boolean semirings
+// 55 purely Boolean semirings
 //------------------------------------------------------------------------------
 
-// All types in these 40 semirings are BOOL
+// All types in these 44 semirings are BOOL
 
-// 10 semirings with LOR monoid; the 2nd argument is the multiply operator
+// 11 semirings with LOR monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( LOR   , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( LOR   , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( LOR   , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( LOR   , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( LOR   , GxB_, LXOR   )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, EQ     )
-GB_SEMIRING_DEFINE ( LOR   , GrB_, NE     )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, GT     )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, LT     )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, GE     )
 GB_SEMIRING_DEFINE ( LOR   , GrB_, LE     )
 
-// 10 semirings with LAND monoid; the 2nd argument is the multiply operator
+// 11 semirings with LAND monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( LAND  , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( LAND  , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( LAND  , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( LAND  , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( LAND  , GxB_, LXOR   )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, EQ     )
-GB_SEMIRING_DEFINE ( LAND  , GrB_, NE     )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, GT     )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, LT     )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, GE     )
 GB_SEMIRING_DEFINE ( LAND  , GrB_, LE     )
 
-// 10 semirings with LXOR monoid; the 2nd argument is the multiply operator
+// 11 semirings with LXOR monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( LXOR , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( LXOR , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( LXOR , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( LXOR , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( LXOR , GxB_, LXOR   )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, EQ     )
-GB_SEMIRING_DEFINE ( LXOR , GrB_, NE     )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, GT     )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, LT     )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, GE     )
 GB_SEMIRING_DEFINE ( LXOR , GrB_, LE     )
 
-// 10 semirings with EQ monoid; the 2nd argument is the multiply operator
+// 11 semirings with EQ monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( EQ   , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( EQ   , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( EQ   , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( EQ   , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( EQ   , GxB_, LXOR   )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, EQ     )
-GB_SEMIRING_DEFINE ( EQ   , GrB_, NE     )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, GT     )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, LT     )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, GE     )
 GB_SEMIRING_DEFINE ( EQ   , GrB_, LE     )
 
+// 11 semirings with ANY monoid; the 2nd argument is the multiply operator
+GB_SEMIRING_DEFINE ( ANY  , GrB_, FIRST  )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( ANY  , GxB_, PAIR   )
+GB_SEMIRING_DEFINE ( ANY  , GxB_, LOR    )
+GB_SEMIRING_DEFINE ( ANY  , GxB_, LAND   )
+GB_SEMIRING_DEFINE ( ANY  , GxB_, LXOR   )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, EQ     )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, GT     )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, LT     )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, GE     )
+GB_SEMIRING_DEFINE ( ANY  , GrB_, LE     )
+
 #else
 
 //------------------------------------------------------------------------------
-// 76 semirings of the form TxT->T
+// 100 semirings of the form TxT->T
 //------------------------------------------------------------------------------
 
-// All types in these 68 semirings are the same.  These are defined for
+// All types in these semirings are the same.  These are defined for
 // the 10 non-Boolean types, not when T is BOOL.
 
-// 19 semirings with MIN monoid; the 2nd argument is the multiply operator
+// 20 semirings with MIN monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( MIN   , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( MIN   , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( MIN   , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( MIN   , GrB_, MIN    )
 GB_SEMIRING_DEFINE ( MIN   , GrB_, MAX    )
 GB_SEMIRING_DEFINE ( MIN   , GrB_, PLUS   )
@@ -141,9 +155,10 @@ GB_SEMIRING_DEFINE ( MIN   , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( MIN   , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( MIN   , GxB_, LXOR   )
 
-// 19 semirings with MAX monoid; the 2nd argument is the multiply operator
+// 20 semirings with MAX monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( MAX   , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( MAX   , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( MAX   , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( MAX   , GrB_, MIN    )
 GB_SEMIRING_DEFINE ( MAX   , GrB_, MAX    )
 GB_SEMIRING_DEFINE ( MAX   , GrB_, PLUS   )
@@ -162,9 +177,10 @@ GB_SEMIRING_DEFINE ( MAX   , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( MAX   , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( MAX   , GxB_, LXOR   )
 
-// 19 semirings with PLUS monoid; the 2nd argument is the multiply operator
+// 20 semirings with PLUS monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( PLUS  , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( PLUS  , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( PLUS  , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( PLUS  , GrB_, MIN    )
 GB_SEMIRING_DEFINE ( PLUS  , GrB_, MAX    )
 GB_SEMIRING_DEFINE ( PLUS  , GrB_, PLUS   )
@@ -183,9 +199,10 @@ GB_SEMIRING_DEFINE ( PLUS  , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( PLUS  , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( PLUS  , GxB_, LXOR   )
 
-// 19 semirings with TIMES monoid; the 2nd argument is the multiply operator
+// 20 semirings with TIMES monoid; the 2nd argument is the multiply operator
 GB_SEMIRING_DEFINE ( TIMES , GrB_, FIRST  )
 GB_SEMIRING_DEFINE ( TIMES , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( TIMES , GxB_, PAIR   )
 GB_SEMIRING_DEFINE ( TIMES , GrB_, MIN    )
 GB_SEMIRING_DEFINE ( TIMES , GrB_, MAX    )
 GB_SEMIRING_DEFINE ( TIMES , GrB_, PLUS   )
@@ -204,8 +221,30 @@ GB_SEMIRING_DEFINE ( TIMES , GxB_, LOR    )
 GB_SEMIRING_DEFINE ( TIMES , GxB_, LAND   )
 GB_SEMIRING_DEFINE ( TIMES , GxB_, LXOR   )
 
+// 20 semirings with ANY monoid; the 2nd argument is the multiply operator
+GB_SEMIRING_DEFINE ( ANY   , GrB_, FIRST  )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, SECOND )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, PAIR   )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, MIN    )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, MAX    )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, PLUS   )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, MINUS  )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, RMINUS )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, TIMES  )
+GB_SEMIRING_DEFINE ( ANY   , GrB_, DIV    )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, RDIV   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISEQ   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISNE   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISGT   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISLT   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISGE   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, ISLE   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, LOR    )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, LAND   )
+GB_SEMIRING_DEFINE ( ANY   , GxB_, LXOR   )
+
 //------------------------------------------------------------------------------
-// 24 semirings of the form TxT->bool
+// 30 semirings of the form TxT->bool
 //------------------------------------------------------------------------------
 
 // The multiply operator has the form z=compare(x,y), where x and y are of
@@ -247,6 +286,14 @@ GB_SEMIRING_COMPARE_DEFINE ( EQ   , LT )
 GB_SEMIRING_COMPARE_DEFINE ( EQ   , GE )
 GB_SEMIRING_COMPARE_DEFINE ( EQ   , LE )
 
+// 6 semrings with ANY monoid; the 2nd argument is the comparison operator
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , EQ )
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , NE )
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , GT )
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , LT )
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , GE )
+GB_SEMIRING_COMPARE_DEFINE ( ANY  , LE )
+
 #endif
 
 #undef GB
diff --git a/Source/Template/GB_subref_template.c b/Source/Template/GB_subref_template.c
index 57a92e6e14..4230e735fa 100644
--- a/Source/Template/GB_subref_template.c
+++ b/Source/Template/GB_subref_template.c
@@ -2,7 +2,7 @@
 // GB_subref_template: C = A(I,J), or C = pattern (A(I,J))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -393,7 +393,7 @@
                         int64_t pright = pA_end - 1 ;
                         #if defined ( GB_SYMBOLIC )
                         bool is_zombie ;
-                        GB_BINARY_ZOMBIE (i, Ai, pleft, pright, found,
+                        GB_BINARY_SEARCH_ZOMBIE (i, Ai, pleft, pright, found,
                             nzombies, is_zombie) ;
                         #else
                         GB_BINARY_SEARCH (i, Ai, pleft, pright, found) ;
diff --git a/Source/Template/GB_unaryop_factory.c b/Source/Template/GB_unaryop_factory.c
index a05568d855..972ac3233a 100644
--- a/Source/Template/GB_unaryop_factory.c
+++ b/Source/Template/GB_unaryop_factory.c
@@ -2,7 +2,7 @@
 // GB_unaryop_factory.c:  switch factory for unary operators and 2 types
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/GB_unaryop_transpose.c b/Source/Template/GB_unaryop_transpose.c
index 6fb7870c6c..6714540374 100644
--- a/Source/Template/GB_unaryop_transpose.c
+++ b/Source/Template/GB_unaryop_transpose.c
@@ -2,7 +2,7 @@
 // GB_unaryop_transpose: C=op(cast(A')), transpose, typecast, and apply op
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Source/Template/README.txt b/Source/Template/README.txt
index e68436353a..bfc8dc7809 100644
--- a/Source/Template/README.txt
+++ b/Source/Template/README.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 This is the GraphBLAS/Source/Template folder.
diff --git a/Source/all_user_objects.c b/Source/all_user_objects.c
deleted file mode 100644
index 284ace3ebe..0000000000
--- a/Source/all_user_objects.c
+++ /dev/null
@@ -1,78 +0,0 @@
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Source/all_user_objects.c
-//------------------------------------------------------------------------------
-
-// This file is constructed automatically by cmake and m4 when GraphBLAS is
-// compiled, from the Config/user_def*.m4 and *.m4 files in User/.  Do not edit
-// this file directly.  It contains references to internally-defined functions
-// and objects inside GraphBLAS, which are not user-callable.
-
-#include "GB_mxm.h"
-#include "GB_user.h"
-
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Config/user_def1.m4: define user-defined objects
-//------------------------------------------------------------------------------
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//------------------------------------------------------------------------------
-// SuiteSparse/GraphBLAS/Config/user_def2.m4: code to call user semirings
-//------------------------------------------------------------------------------
-
-GrB_Info GB_AxB_user
-(
-    const GrB_Desc_Value GB_AxB_method,
-    const GrB_Semiring GB_s,
-
-    GrB_Matrix *GB_Chandle,
-    const GrB_Matrix GB_M,
-    const GrB_Matrix GB_A,          // not used for dot2 method
-    const GrB_Matrix GB_B,
-    bool GB_flipxy,
-
-    // for heap method only:
-    int64_t *GB_RESTRICT GB_List,
-    GB_pointer_pair *GB_RESTRICT GB_pA_pair,
-    GB_Element *GB_RESTRICT GB_Heap,
-    const int64_t GB_bjnz_max,
-
-    // for Gustavson's method only:
-    GB_Sauna GB_C_Sauna,
-
-    // for dot method only:
-    const GrB_Matrix *GB_Aslice,    // for dot2 only
-    int64_t *GB_RESTRICT GB_B_slice,   // for dot2 only
-    const int GB_dot_nthreads,      // for dot2 and dot3
-    const int GB_naslice,           // for dot2 only
-    const int GB_nbslice,           // for dot2 only
-    int64_t **GB_C_counts,          // for dot2 only
-
-    // for dot3 method only:
-    const GB_task_struct *GB_RESTRICT GB_TaskList,
-    const int GB_ntasks
-)
-{
-    GrB_Info GB_info = GrB_SUCCESS ;
-    if (0)
-    {
-        ;
-    }
-    return (GB_info) ;
-}
-
diff --git a/Source/codegen.m b/Source/codegen.m
index 29605f7385..2d15a9f2ab 100644
--- a/Source/codegen.m
+++ b/Source/codegen.m
@@ -1,6 +1,7 @@
 function codegen
 %CODEGEN generate all code for Generated/*
 
+codegen_1type ;     % types
 codegen_axb ;       % semirings
 codegen_binop ;     % binary operators
 codegen_unop ;      % unary operators
diff --git a/Source/codegen_1type.m b/Source/codegen_1type.m
new file mode 100644
index 0000000000..fc15a3f028
--- /dev/null
+++ b/Source/codegen_1type.m
@@ -0,0 +1,34 @@
+function codegen_1type
+%CODEGEN_1TYPE create functions for all 11 built-in types
+%
+% This function creates all files of the form GB_type__*.[ch], including 11
+% functions (GB_type__*.c) and one include file, GB_type__include.h.
+
+fprintf ('\ntypes:\n') ;
+
+f = fopen ('Generated/GB_type__include.h', 'w') ;
+fprintf (f, '//------------------------------------------------------------------------------\n') ;
+fprintf (f, '// GB_type__include.h: definitions for GB_type__*.c\n') ;
+fprintf (f, '//------------------------------------------------------------------------------\n') ;
+fprintf (f, '\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
+fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.\n') ;
+fprintf (f, '\n') ;
+fprintf (f, '// This file has been automatically generated from Generator/GB_type.h') ;
+fprintf (f, '\n\n') ;
+fclose (f) ;
+
+codegen_1type_template ('bool') ;
+codegen_1type_template ('int8_t') ;
+codegen_1type_template ('int16_t') ;
+codegen_1type_template ('int32_t') ;
+codegen_1type_template ('int64_t') ;
+codegen_1type_template ('uint8_t') ;
+codegen_1type_template ('uint16_t') ;
+codegen_1type_template ('uint32_t') ;
+codegen_1type_template ('uint64_t') ;
+codegen_1type_template ('float') ;
+codegen_1type_template ('double') ;
+
+fprintf ('\n') ;
+
diff --git a/Source/codegen_1type_template.m b/Source/codegen_1type_template.m
new file mode 100644
index 0000000000..da67579097
--- /dev/null
+++ b/Source/codegen_1type_template.m
@@ -0,0 +1,35 @@
+function codegen_1type_method (xtype)
+%CODEGEN_1TYPE_METHOD create a function to compute over a given type
+%
+% codegen_1type_method (xtype)
+
+f = fopen ('control.m4', 'w') ;
+
+[fname, unsigned, bits] = codegen_type (xtype) ;
+
+% function names
+fprintf (f, 'define(`GB_Cdense_05d'', `GB_Cdense_05d__%s'')\n', fname) ;
+fprintf (f, 'define(`GB_Cdense_06d'', `GB_Cdense_06d__%s'')\n', fname) ;
+fprintf (f, 'define(`GB_Cdense_25'', `GB_Cdense_25__%s'')\n', fname) ;
+
+fprintf (f, 'define(`GB_ctype'', `%s'')\n', xtype) ;
+
+% create the disable flag
+disable = sprintf ('GxB_NO_%s', upper (fname)) ;
+fprintf (f, 'define(`GB_disable'', `(%s)'')\n', disable) ;
+fclose (f) ;
+
+% construct the *.c file
+cmd = sprintf (...
+'cat control.m4 Generator/GB_type.c | m4 | tail -n +6 > Generated/GB_type__%s.c', ...
+fname) ;
+fprintf ('.') ;
+system (cmd) ;
+
+% append to the *.h file
+cmd = sprintf (...
+'cat control.m4 Generator/GB_type.h | m4 | tail -n +6 >> Generated/GB_type__include.h') ;
+system (cmd) ;
+
+delete ('control.m4') ;
+
diff --git a/Source/codegen_axb.m b/Source/codegen_axb.m
index 3054a19fd5..f7d2287132 100644
--- a/Source/codegen_axb.m
+++ b/Source/codegen_axb.m
@@ -11,7 +11,7 @@
 fprintf (f, '// GB_AxB__include.h: definitions for GB_AxB__*.c\n') ;
 fprintf (f, '//------------------------------------------------------------------------------\n') ;
 fprintf (f, '\n') ;
-fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
 fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.\n') ;
 fprintf (f, '\n') ;
 fprintf (f, '// This file has been automatically generated from Generator/GB_AxB.h') ;
@@ -20,6 +20,11 @@
 
 codegen_axb_template ('first',  'xarg', 'xarg') ;
 codegen_axb_template ('second', 'yarg', 'yarg') ;
+codegen_axb_template ('pair',   '1', '1') ;
+
+% The ANY operator is not used as a multiplicative operator in the generated
+% functions.  It can be used as the multiplicative op in a semiring, but is
+% renamed to SECOND before calling the generated function.
 
 codegen_axb_template ('min',    [ ], 'GB_IMIN (xarg, yarg)', 'fminf (xarg, yarg)', 'fmin (xarg, yarg)') ;
 codegen_axb_template ('max',    [ ], 'GB_IMAX (xarg, yarg)', 'fmaxf (xarg, yarg)', 'fmax (xarg, yarg)') ;
diff --git a/Source/codegen_axb_compare_template.m b/Source/codegen_axb_compare_template.m
index 9e8711b3db..650ef4b4ae 100644
--- a/Source/codegen_axb_compare_template.m
+++ b/Source/codegen_axb_compare_template.m
@@ -4,67 +4,87 @@ function codegen_axb_compare_template (multop, bmult, mult)
 fprintf ('\n%-7s', multop) ;
 
 % lor monoid
-add = 'w = (w || t)' ;
+add = 'w |= t' ;
+addfunc = 'w | t' ;
 if (~isempty (bmult))
-codegen_axb_method ('lor', multop, add, bmult, 'bool', 'bool'    , 'false', 'true') ;
+codegen_axb_method ('lor', multop, add, addfunc, bmult, 'bool', 'bool'    , 'false', 'true', 1) ;
 end
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'int8_t'  , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'uint8_t' , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'int16_t' , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'uint16_t', 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'int32_t' , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'uint32_t', 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'int64_t' , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'uint64_t', 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'float'   , 'false', 'true') ;
-codegen_axb_method ('lor', multop, add,  mult, 'bool', 'double'  , 'false', 'true') ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'int8_t'  , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'uint8_t' , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'int16_t' , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'uint16_t', 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'int32_t' , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'uint32_t', 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'int64_t' , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'uint64_t', 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'float'   , 'false', 'true', 1) ;
+codegen_axb_method ('lor', multop, add, addfunc,  mult, 'bool', 'double'  , 'false', 'true', 1) ;
+
+% any monoid
+add = 'w = t' ;
+addfunc = 't' ;
+if (~isempty (bmult))
+codegen_axb_method ('any', multop, add, addfunc, bmult, 'bool', 'bool'    , 'false', '(any value)', 0) ;
+end
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'int8_t'  , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'uint8_t' , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'int16_t' , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'uint16_t', 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'int32_t' , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'uint32_t', 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'int64_t' , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'uint64_t', 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'float'   , 'false', '(any value)', 0) ;
+codegen_axb_method ('any', multop, add, addfunc,  mult, 'bool', 'double'  , 'false', '(any value)', 0) ;
 
 % land monoid
-add = 'w = (w && t)' ;
+add = 'w &= t' ;
+addfunc = 'w & t' ;
 if (~isempty (bmult))
-codegen_axb_method ('land', multop, add, bmult, 'bool', 'bool'    , 'true', 'false') ;
+codegen_axb_method ('land', multop, add, addfunc, bmult, 'bool', 'bool'    , 'true', 'false', 1) ;
 end
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'int8_t'  , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'uint8_t' , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'int16_t' , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'uint16_t', 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'int32_t' , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'uint32_t', 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'int64_t' , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'uint64_t', 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'float'   , 'true', 'false') ;
-codegen_axb_method ('land', multop, add,  mult, 'bool', 'double'  , 'true', 'false') ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'int8_t'  , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'uint8_t' , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'int16_t' , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'uint16_t', 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'int32_t' , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'uint32_t', 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'int64_t' , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'uint64_t', 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'float'   , 'true', 'false', 1) ;
+codegen_axb_method ('land', multop, add, addfunc,  mult, 'bool', 'double'  , 'true', 'false', 1) ;
 
 % lxor monoid
-add = 'w = (w != t)' ;
+add = 'w ^= t' ;
+addfunc = 'w ^ t' ;
 if (~isempty (bmult))
-codegen_axb_method ('lxor', multop, add, bmult, 'bool', 'bool'    , 'false', [ ]) ;
+codegen_axb_method ('lxor', multop, add, addfunc, bmult, 'bool', 'bool'    , 'false', [ ], 1) ;
 end
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'int8_t'  , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'uint8_t' , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'int16_t' , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'uint16_t', 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'int32_t' , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'uint32_t', 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'int64_t' , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'uint64_t', 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'float'   , 'false', [ ]) ;
-codegen_axb_method ('lxor', multop, add,  mult, 'bool', 'double'  , 'false', [ ]) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'int8_t'  , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'uint8_t' , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'int16_t' , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'uint16_t', 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'int32_t' , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'uint32_t', 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'int64_t' , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'uint64_t', 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'float'   , 'false', [ ], 1) ;
+codegen_axb_method ('lxor', multop, add, addfunc,  mult, 'bool', 'double'  , 'false', [ ], 1) ;
 
-% eq monoid
+% eq (lxnor) monoid.  Cannot be done with OpenMP atomic update
 add = 'w = (w == t)' ;
+addfunc = 'w == t' ;
 if (~isempty (bmult))
-codegen_axb_method ('eq', multop, add, bmult, 'bool', 'bool'    , 'true', [ ]) ;
+codegen_axb_method ('eq', multop, add, addfunc, bmult, 'bool', 'bool'    , 'true', [ ], 0) ;
 end
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'int8_t'  , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'uint8_t' , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'int16_t' , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'uint16_t', 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'int32_t' , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'uint32_t', 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'int64_t' , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'uint64_t', 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'float'   , 'true', [ ]) ;
-codegen_axb_method ('eq', multop, add,  mult, 'bool', 'double'  , 'true', [ ]) ;
-
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'int8_t'  , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'uint8_t' , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'int16_t' , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'uint16_t', 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'int32_t' , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'uint32_t', 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'int64_t' , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'uint64_t', 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'float'   , 'true', [ ], 0) ;
+codegen_axb_method ('eq', multop, add, addfunc,  mult, 'bool', 'double'  , 'true', [ ], 0) ;
 
diff --git a/Source/codegen_axb_method.m b/Source/codegen_axb_method.m
index 32f3715343..61437cd76d 100644
--- a/Source/codegen_axb_method.m
+++ b/Source/codegen_axb_method.m
@@ -1,10 +1,50 @@
-function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity, terminal)
+function codegen_axb_method (addop, multop, add, addfunc, mult, ztype, xytype, identity, terminal, omp_atomic)
 %CODEGEN_AXB_METHOD create a function to compute C=A*B over a semiring
 %
-% codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity, terminal)
+% codegen_axb_method (addop, multop, add, addfunc, mult, ztype, xytype, identity, terminal, omp_atomic)
 
 f = fopen ('control.m4', 'w') ;
 
+is_first    = isequal (multop, 'first') ;
+is_second   = isequal (multop, 'second') ;
+is_pair     = isequal (multop, 'pair') ;
+is_any      = isequal (addop, 'any') ;
+is_eq       = isequal (addop, 'eq') ;
+is_any_pair = is_any && isequal (multop, 'pair') ;
+is_real     = isequal (ztype, 'float') || isequal (ztype, 'double') ;
+
+% special cases for the PAIR multiplier
+switch (ztype)
+    case { 'bool' }
+        bits = '0x1L' ;
+    case { 'int8_t', 'uint8_t' }
+        bits = '0xffL' ;
+    case { 'int16_t', 'uint16_t' }
+        bits = '0xffffL' ;
+    case { 'int32_t', 'uint32_t' }
+        bits = '0xffffffffL' ;
+    case { 'int64_t', 'uint64_t' }
+        bits = '0' ;
+    case { 'float', 'double' }
+        bits = '0' ;
+    otherwise
+        error ('unknown type') ;
+end
+fprintf (f, 'define(`GB_ctype_bits'', `%s'')\n', bits) ;
+
+if isequal (addop, 'plus') && isequal (multop, 'times') && isequal (ztype, 'float')
+    % plus_times_fp32 semiring
+end
+
+if (is_pair)
+    % these semirings are renamed to any_pair, and not thus created
+    if (isequal (addop, 'land') || isequal (addop, 'eq'   ) || ...
+        isequal (addop, 'lor' ) || isequal (addop, 'max'  ) || ...
+        isequal (addop, 'min' ) || isequal (addop, 'times'))
+        return
+    end
+end
+
 [fname, unsigned, bits] = codegen_type (xytype) ;
 [zname, ~, ~] = codegen_type (ztype) ;
 
@@ -14,7 +54,9 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
 fprintf (f, 'define(`GB_AgusB'', `GB_AgusB__%s'')\n', name) ;
 fprintf (f, 'define(`GB_Adot2B'', `GB_Adot2B__%s'')\n', name) ;
 fprintf (f, 'define(`GB_Adot3B'', `GB_Adot3B__%s'')\n', name) ;
+fprintf (f, 'define(`GB_Adot4B'', `GB_Adot4B__%s'')\n', name) ;
 fprintf (f, 'define(`GB_AheapB'', `GB_AheapB__%s'')\n', name) ;
+fprintf (f, 'define(`GB_Asaxpy3B'', `GB_Asaxpy3B__%s'')\n', name) ;
 
 % type of C, A, and B
 fprintf (f, 'define(`GB_ctype'', `%s'')\n', ztype) ;
@@ -24,26 +66,68 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
 % identity and terminal values for the monoid
 fprintf (f, 'define(`GB_identity'', `%s'')\n', identity) ;
 
-if (~isempty (terminal))
-    fprintf (f, 'define(`GB_terminal'', `if (cij == %s) break ;'')\n', ...
-        terminal) ;
-    fprintf (f, 'define(`GB_dot_simd'', `;'')\n') ;
+if (is_any_pair)
+    fprintf (f, 'define(`GB_is_any_pair_semiring'', `1'')\n') ;
+else
+    fprintf (f, 'define(`GB_is_any_pair_semiring'', `0'')\n') ;
+end
+
+if (is_pair)
+    fprintf (f, 'define(`GB_is_pair_multiplier'', `1'')\n') ;
+else
+    fprintf (f, 'define(`GB_is_pair_multiplier'', `0'')\n') ;
+end
+
+if (is_eq)
+    fprintf (f, 'define(`GB_is_eq_monoid'', `1'')\n') ;
+else
+    fprintf (f, 'define(`GB_is_eq_monoid'', `0'')\n') ;
+end
+
+if (is_any)
+    % the ANY monoid terminates on the first entry seen
+    fprintf (f, 'define(`GB_is_any_monoid'', `1'')\n') ;
+    fprintf (f, 'define(`GB_terminal'', `break ;'')\n') ;
+    fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ;
+elseif (~isempty (terminal))
+    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
+    fprintf (f, 'define(`GB_terminal'', `if (cij == %s) break ;'')\n', terminal) ;
+    fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ;
 else
+    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
     fprintf (f, 'define(`GB_terminal'', `;'')\n') ;
-    fprintf (f, 'define(`GB_dot_simd'', `GB_PRAGMA_SIMD'')\n') ;
+    fprintf (f, 'define(`GB_dot_simd_vectorize'', `GB_PRAGMA_SIMD'')\n') ;
 end
 
+% all built-in monoids are atomic
+fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ;
+
+% only PLUS, TIMES, LOR, LAND, and LXOR can be done with OpenMP atomics
+fprintf (f, 'define(`GB_has_omp_atomic'', `%d'')\n', omp_atomic) ;
+
+% MIN and MAX for floating-point types need unsigned integer puns
+% pun for compare-and-swap of ztype
+if (isequal (ztype, 'float'))
+    pun = 'uint32_t' ;
+elseif (isequal (ztype, 'double'))
+    pun = 'uint64_t' ;
+else
+    % no type punning needed for compare-and-swap
+    pun = ztype ;
+end
+fprintf (f, 'define(`GB_ctype_pun'', `%s'')\n', pun) ;
+
 % to get an entry from A
-is_second = isequal (multop, 'second') ;
-if (is_second)
+if (is_second || is_pair)
+    % value of A is ignored for the SECOND and PAIR operators
     fprintf (f, 'define(`GB_geta'', `;'')\n') ;
 else
     fprintf (f, 'define(`GB_geta'', `%s $1 = $2 [$3]'')\n', xytype) ;
 end
 
 % to get an entry from B
-is_first = isequal (multop, 'first') ;
-if (is_first)
+if (is_first || is_pair)
+    % value of B is ignored for the FIRST and PAIR operators
     fprintf (f, 'define(`GB_getb'', `;'')\n') ;
 else
     fprintf (f, 'define(`GB_getb'', `%s $1 = $2 [$3]'')\n', xytype) ;
@@ -64,15 +148,20 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
 mult2 = strrep (mult2, 'yarg', '`$3''') ;
 fprintf (f, 'define(`GB_MULTIPLY'', `$1 = %s'')\n', mult2) ;
 
-% create the add operator
+% create the add operator, of the form w += t
 add2 = strrep (add,  'w', '`$1''') ;
 add2 = strrep (add2, 't', '`$2''') ;
-fprintf (f, 'define(`GB_ADD'', `%s'')\n', add2) ;
+fprintf (f, 'define(`GB_add_update'', `%s'')\n', add2) ;
+
+% create the add function, of the form w + t
+add2 = strrep (addfunc,  'w', '`$1''') ;
+add2 = strrep (add2,     't', '`$2''') ;
+fprintf (f, 'define(`GB_add_function'', `%s'')\n', add2) ;
 
 % create the multiply-add operator
 if (isequal (ztype, 'float') || isequal (ztype, 'double') || ...
-    isequal (ztype, 'bool') || is_first || is_second || ...
-    isequal (multop (1:2), 'is'))
+    isequal (ztype, 'bool') || is_first || is_second || is_pair || ...
+    isequal (multop (1:2), 'is') || isequal (multop, 'any'))
     % float and double do not get promoted.
     % bool is OK since promotion of the result (0 or 1) to int is safe.
     % first and second are OK since no promotion occurs.
@@ -81,13 +170,12 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
     multadd = strrep (multadd, 'w', '`$1''') ;
     multadd = strrep (multadd, 'xarg', '`$2''') ;
     multadd = strrep (multadd, 'yarg', '`$3''') ;
-    fprintf (f, 'define(`GB_MULTIPLY_ADD'', `%s'')\n', multadd) ;
+    fprintf (f, 'define(`GB_multiply_add'', `%s'')\n', multadd) ;
 else
-    % use explicit typecasing to avoid ANSI C integer promotion.
+    % use explicit typecasting to avoid ANSI C integer promotion.
     add2 = strrep (add,  'w', '`$1''') ;
     add2 = strrep (add2, 't', 'x_op_y') ;
-    fprintf (f, 'define(`GB_ADD'', `%s'')\n', add2) ;
-    fprintf (f, 'define(`GB_MULTIPLY_ADD'', `%s x_op_y = %s ; %s'')\n', ...
+    fprintf (f, 'define(`GB_multiply_add'', `%s x_op_y = %s ; %s'')\n', ...
         ztype, mult2, add2) ;
 end
 
@@ -106,6 +194,7 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
 fprintf (f, 'define(`GB_disable'', `(%s)'')\n', disable) ;
 fclose (f) ;
 
+% To create GB_control.h
 % ff = fopen ('temp.h', 'a') ;
 % fprintf (ff, '// #define GxB_NO_%s\n', upper (addop)) ;
 % fprintf (ff, '// #define GxB_NO_%s\n', upper (multop)) ;
@@ -115,14 +204,14 @@ function codegen_axb_method (addop, multop, add, mult, ztype, xytype, identity,
 
 % construct the *.c file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_AxB.c | m4 | tail -n +16 > Generated/GB_AxB__%s.c', ...
+'cat control.m4 Generator/GB_AxB.c | m4 | tail -n +28 > Generated/GB_AxB__%s.c', ...
 name) ;
 fprintf ('.') ;
 system (cmd) ;
 
 % append to the *.h file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_AxB.h | m4 | tail -n +16 >> Generated/GB_AxB__include.h') ;
+'cat control.m4 Generator/GB_AxB.h | m4 | tail -n +28 >> Generated/GB_AxB__include.h') ;
 system (cmd) ;
 
 delete ('control.m4') ;
diff --git a/Source/codegen_axb_template.m b/Source/codegen_axb_template.m
index d22e4486ff..0c4693f813 100644
--- a/Source/codegen_axb_template.m
+++ b/Source/codegen_axb_template.m
@@ -19,69 +19,100 @@ function codegen_axb_template (multop, bmult, imult, fmult, dmult)
     dmult = fmult ;
 end
 
-% min monoid: all are terminal
+plusinf32 = 'INFINITY' ;
+neginf32  = '(-INFINITY)' ;
+plusinf64 = '((double) INFINITY)' ;
+neginf64  = '((double) -INFINITY)' ;
+
+% MIN monoid: all are terminal.  None can be done with OpenMP atomic update
 add = 'w = GB_IMIN (w, t)' ;
-codegen_axb_method ('min', multop, add, imult, 'int8_t'  , 'int8_t'  , 'INT8_MAX'  , 'INT8_MIN'  ) ;
-codegen_axb_method ('min', multop, add, imult, 'int16_t' , 'int16_t' , 'INT16_MAX' , 'INT16_MIN' ) ;
-codegen_axb_method ('min', multop, add, imult, 'int32_t' , 'int32_t' , 'INT32_MAX' , 'INT32_MIN' ) ;
-codegen_axb_method ('min', multop, add, imult, 'int64_t' , 'int64_t' , 'INT64_MAX' , 'INT64_MIN' ) ;
-codegen_axb_method ('min', multop, add, imult, 'uint8_t' , 'uint8_t' , 'UINT8_MAX' , '0'         ) ;
-codegen_axb_method ('min', multop, add, imult, 'uint16_t', 'uint16_t', 'UINT16_MAX', '0'         ) ;
-codegen_axb_method ('min', multop, add, imult, 'uint32_t', 'uint32_t', 'UINT32_MAX', '0'         ) ;
-codegen_axb_method ('min', multop, add, imult, 'uint64_t', 'uint64_t', 'UINT64_MAX', '0'         ) ;
+addfunc = 'GB_IMIN (w, t)' ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'int8_t'  , 'int8_t'  , 'INT8_MAX'  , 'INT8_MIN'  , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'int16_t' , 'int16_t' , 'INT16_MAX' , 'INT16_MIN' , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'int32_t' , 'int32_t' , 'INT32_MAX' , 'INT32_MIN' , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'int64_t' , 'int64_t' , 'INT64_MAX' , 'INT64_MIN' , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'uint8_t' , 'uint8_t' , 'UINT8_MAX' , '0'         , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'uint16_t', 'uint16_t', 'UINT16_MAX', '0'         , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'uint32_t', 'uint32_t', 'UINT32_MAX', '0'         , 0) ;
+codegen_axb_method ('min', multop, add, addfunc, imult, 'uint64_t', 'uint64_t', 'UINT64_MAX', '0'         , 0) ;
 add = 'w = fminf (w, t)' ;
-codegen_axb_method ('min', multop, add, fmult, 'float'   , 'float'   , 'INFINITY'  , '(-INFINITY)' ) ;
+addfunc = 'fminf (w, t)' ;
+codegen_axb_method ('min', multop, add, addfunc, fmult, 'float'   , 'float'   , plusinf32   , neginf32    , 0) ;
 add = 'w = fmin (w, t)' ;
-codegen_axb_method ('min', multop, add, dmult, 'double'  , 'double'  , ....
-        '((double) INFINITY)'  , '((double) -INFINITY)' ) ;
+addfunc = 'fmin (w, t)' ;
+codegen_axb_method ('min', multop, add, addfunc, dmult, 'double'  , 'double'  , plusinf64   , neginf64    , 0) ;
 
-% max monoid: all are terminal
+% MAX monoid: all are terminal.  None can be done with OpenMP atomic update
 add = 'w = GB_IMAX (w, t)' ;
-codegen_axb_method ('max', multop, add, imult, 'int8_t'  , 'int8_t'  , 'INT8_MIN'  , 'INT8_MAX'  ) ;
-codegen_axb_method ('max', multop, add, imult, 'int16_t' , 'int16_t' , 'INT16_MIN' , 'INT16_MAX' ) ;
-codegen_axb_method ('max', multop, add, imult, 'int32_t' , 'int32_t' , 'INT32_MIN' , 'INT32_MAX' ) ;
-codegen_axb_method ('max', multop, add, imult, 'int64_t' , 'int64_t' , 'INT64_MIN' , 'INT64_MAX' ) ;
-codegen_axb_method ('max', multop, add, imult, 'uint8_t' , 'uint8_t' , '0'         , 'UINT8_MAX' ) ;
-codegen_axb_method ('max', multop, add, imult, 'uint16_t', 'uint16_t', '0'         , 'UINT16_MAX') ;
-codegen_axb_method ('max', multop, add, imult, 'uint32_t', 'uint32_t', '0'         , 'UINT32_MAX') ;
-codegen_axb_method ('max', multop, add, imult, 'uint64_t', 'uint64_t', '0'         , 'UINT64_MAX') ;
+addfunc = 'GB_IMAX (w, t)' ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'int8_t'  , 'int8_t'  , 'INT8_MIN'  , 'INT8_MAX'  , 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'int16_t' , 'int16_t' , 'INT16_MIN' , 'INT16_MAX' , 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'int32_t' , 'int32_t' , 'INT32_MIN' , 'INT32_MAX' , 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'int64_t' , 'int64_t' , 'INT64_MIN' , 'INT64_MAX' , 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'uint8_t' , 'uint8_t' , '0'         , 'UINT8_MAX' , 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'uint16_t', 'uint16_t', '0'         , 'UINT16_MAX', 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'uint32_t', 'uint32_t', '0'         , 'UINT32_MAX', 0) ;
+codegen_axb_method ('max', multop, add, addfunc, imult, 'uint64_t', 'uint64_t', '0'         , 'UINT64_MAX', 0) ;
+% floating-point MAX must use unsigned integer puns for compare-and-swap
 add = 'w = fmaxf (w, t)' ;
-codegen_axb_method ('max', multop, add, fmult, 'float'   , 'float'   , '(-INFINITY)' , 'INFINITY') ;
+addfunc = 'fmaxf (w, t)' ;
+codegen_axb_method ('max', multop, add, addfunc, fmult, 'float'   , 'float'   , neginf32    , plusinf32   , 0) ;
 add = 'w = fmax (w, t)' ;
-codegen_axb_method ('max', multop, add, dmult, 'double'  , 'double'  , ...
-        '((double) -INFINITY)'  , '((double) INFINITY)' ) ;
+addfunc = 'fmax (w, t)' ;
+codegen_axb_method ('max', multop, add, addfunc, dmult, 'double'  , 'double'  , neginf64    , plusinf64   , 0) ;
+
+% ANY monoid: all are terminal.
+add = 'w = t' ;
+addfunc = 't' ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'int8_t'  , 'int8_t'  , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'int16_t' , 'int16_t' , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'int32_t' , 'int32_t' , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'int64_t' , 'int64_t' , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'uint8_t' , 'uint8_t' , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'uint16_t', 'uint16_t', '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'uint32_t', 'uint32_t', '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, imult, 'uint64_t', 'uint64_t', '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, fmult, 'float'   , 'float'   , '0' , [ ], 0) ;
+codegen_axb_method ('any', multop, add, addfunc, dmult, 'double'  , 'double'  , '0' , [ ], 0) ;
 
-% plus monoid: none are terminal
+% PLUS monoid: none are terminal.  All can be done with OpenMP atomic update
 add = 'w += t' ;
-codegen_axb_method ('plus', multop, add, imult, 'int8_t'  , 'int8_t'  , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'uint8_t' , 'uint8_t' , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'int16_t' , 'int16_t' , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'uint16_t', 'uint16_t', '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'int32_t' , 'int32_t' , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'uint32_t', 'uint32_t', '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'int64_t' , 'int64_t' , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, imult, 'uint64_t', 'uint64_t', '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, fmult, 'float'   , 'float'   , '0', [ ]) ;
-codegen_axb_method ('plus', multop, add, dmult, 'double'  , 'double'  , '0', [ ]) ;
+addfunc = 'w + t' ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'int8_t'  , 'int8_t'  , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'uint8_t' , 'uint8_t' , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'int16_t' , 'int16_t' , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'uint16_t', 'uint16_t', '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'int32_t' , 'int32_t' , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'uint32_t', 'uint32_t', '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'int64_t' , 'int64_t' , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, imult, 'uint64_t', 'uint64_t', '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, fmult, 'float'   , 'float'   , '0', [ ], 1) ;
+codegen_axb_method ('plus', multop, add, addfunc, dmult, 'double'  , 'double'  , '0', [ ], 1) ;
 
-% times monoid: integers are terminal, float and double are not
+% TIMES monoid: integers are terminal, float and double are not.
+% All can be done with OpenMP atomic update
 add = 'w *= t' ;
-codegen_axb_method ('times', multop, add, imult, 'int8_t'  , 'int8_t'  , '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'uint8_t' , 'uint8_t' , '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'int16_t' , 'int16_t' , '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'uint16_t', 'uint16_t', '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'int32_t' , 'int32_t' , '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'uint32_t', 'uint32_t', '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'int64_t' , 'int64_t' , '1', '0') ;
-codegen_axb_method ('times', multop, add, imult, 'uint64_t', 'uint64_t', '1', '0') ;
-codegen_axb_method ('times', multop, add, fmult, 'float'   , 'float'   , '1', [ ]) ;
-codegen_axb_method ('times', multop, add, dmult, 'double'  , 'double'  , '1', [ ]) ;
+addfunc = 'w * t' ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'int8_t'  , 'int8_t'  , '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'uint8_t' , 'uint8_t' , '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'int16_t' , 'int16_t' , '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'uint16_t', 'uint16_t', '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'int32_t' , 'int32_t' , '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'uint32_t', 'uint32_t', '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'int64_t' , 'int64_t' , '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, imult, 'uint64_t', 'uint64_t', '1', '0', 1) ;
+codegen_axb_method ('times', multop, add, addfunc, fmult, 'float'   , 'float'   , '1', [ ], 1) ;
+codegen_axb_method ('times', multop, add, addfunc, dmult, 'double'  , 'double'  , '1', [ ], 1) ;
 
-% boolean monoids: lor, land are terminal; lxor, eq are not
+% boolean monoids: LOR, LAND are terminal; LXOR, EQ are not.
+% LOR, LAND, and LXOR can be done as OpenMP atomic updates; EQ cannot.
 if (~isempty (bmult))
-    codegen_axb_method ('lor',  multop, 'w = (w || t)', bmult, 'bool', 'bool', 'false', 'true' ) ;
-    codegen_axb_method ('land', multop, 'w = (w && t)', bmult, 'bool', 'bool', 'true' , 'false') ;
-    codegen_axb_method ('lxor', multop, 'w = (w != t)', bmult, 'bool', 'bool', 'false', [ ]    ) ;
-    codegen_axb_method ('eq',   multop, 'w = (w == t)', bmult, 'bool', 'bool', 'true' , [ ]    ) ;
+    codegen_axb_method ('lor',  multop, 'w |= t', 'w | t', bmult, 'bool', 'bool', 'false', 'true' , 1) ;
+    codegen_axb_method ('land', multop, 'w &= t', 'w & t', bmult, 'bool', 'bool', 'true' , 'false', 1) ;
+    codegen_axb_method ('lxor', multop, 'w ^= t', 'w ^ t', bmult, 'bool', 'bool', 'false', [ ]    , 1) ;
+    codegen_axb_method ('any' , multop, 'w = t' , 't'    , bmult, 'bool', 'bool', '0'    , [ ]    , 0) ;
+    add = 'w = (w == t)' ;
+    addfunc = 'w == t' ;
+    codegen_axb_method ('eq',   multop, add,      addfunc, bmult, 'bool', 'bool', 'true' , [ ]    , 0) ;
 end
 
diff --git a/Source/codegen_binop.m b/Source/codegen_binop.m
index 7eb126bc56..153e4d86c9 100644
--- a/Source/codegen_binop.m
+++ b/Source/codegen_binop.m
@@ -11,7 +11,7 @@
 fprintf (f, '// GB_binop__include.h: definitions for GB_binop__*.c\n') ;
 fprintf (f, '//------------------------------------------------------------------------------\n') ;
 fprintf (f, '\n') ;
-fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
 fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.\n') ;
 fprintf (f, '\n') ;
 fprintf (f, '// This file has been automatically generated from Generator/GB_binop.h') ;
@@ -20,15 +20,20 @@
 
 codegen_binop_template ('first',  0, 'xarg', 'xarg') ;
 codegen_binop_template ('second', 0, 'yarg', 'yarg') ;
-
-codegen_binop_template ('min',    0, [ ], 'GB_IMIN (xarg, yarg)', 'fminf (xarg, yarg)', 'fmin (xarg, yarg)') ;
-codegen_binop_template ('max',    0, [ ], 'GB_IMAX (xarg, yarg)', 'fmaxf (xarg, yarg)', 'fmax (xarg, yarg)') ;
-codegen_binop_template ('plus',   0, [ ], '(xarg + yarg)') ;
-codegen_binop_template ('minus',  0, [ ], '(xarg - yarg)') ;
-codegen_binop_template ('rminus', 0, [ ], '(yarg - xarg)') ;
-codegen_binop_template ('times',  0, [ ], '(xarg * yarg)') ;
-codegen_binop_template ('div',    0, [ ], 'GB_IDIV (xarg, yarg)', '(xarg / yarg)') ;
-codegen_binop_template ('rdiv',   0, [ ], 'GB_IDIV (yarg, xarg)', '(yarg / xarg)') ;
+codegen_binop_template ('pair',   0, '1', '1') ;
+
+% The ANY operator is not used as a binary operator in the generated functions.
+% It can be used as the binary op in eWiseAdd, eWiseMult, etc, but has been
+% renamed to SECOND before calling the generated function.
+
+codegen_binop_template ('min',    0, [ ], 'GB_IMIN (xarg, yarg)', 'fminf (xarg, yarg)', 'fmin (xarg, yarg)', 1) ;
+codegen_binop_template ('max',    0, [ ], 'GB_IMAX (xarg, yarg)', 'fmaxf (xarg, yarg)', 'fmax (xarg, yarg)', 1) ;
+codegen_binop_template ('plus',   0, [ ], '(xarg + yarg)'       , [ ]                 , [ ]                , 1) ;
+codegen_binop_template ('minus',  0, [ ], '(xarg - yarg)'       , [ ]                 , [ ]                , 1) ;
+codegen_binop_template ('rminus', 0, [ ], '(yarg - xarg)'       , [ ]                 , [ ]                , 1) ;
+codegen_binop_template ('times',  0, [ ], '(xarg * yarg)'       , [ ]                 , [ ]                , 1) ;
+codegen_binop_template ('div',    0, [ ], 'GB_IDIV (xarg, yarg)', '(xarg / yarg)'     , [ ]                , 1) ;
+codegen_binop_template ('rdiv',   0, [ ], 'GB_IDIV (yarg, xarg)', '(yarg / xarg)'     , [ ]                , 1) ;
 
 codegen_binop_template ('iseq',   0, [ ], '(xarg == yarg)') ;
 codegen_binop_template ('isne',   0, [ ], '(xarg != yarg)') ;
diff --git a/Source/codegen_binop_method.m b/Source/codegen_binop_method.m
index 9fdfbbb07a..1ea0398572 100644
--- a/Source/codegen_binop_method.m
+++ b/Source/codegen_binop_method.m
@@ -1,10 +1,16 @@
-function codegen_binop_method (binop, op, iscompare, xytype)
+function codegen_binop_method (binop, op, iscompare, xytype, is_binop_subset)
 %CODEGEN_BINOP_METHOD create a function to compute C=binop(A,B)
 %
 % codegen_binop_method (binop, op, iscompare, xytype)
 
 f = fopen ('control.m4', 'w') ;
 
+assert (~isequal (binop, 'any')) ;
+
+if (nargin < 5)
+    is_binop_subset = false ;
+end
+
 [fname, unsigned, bits] = codegen_type (xytype) ;
 
 name = sprintf ('%s_%s', binop, fname) ;
@@ -14,6 +20,51 @@ function codegen_binop_method (binop, op, iscompare, xytype)
 fprintf (f, 'define(`GB_AemultB'', `GB_AemultB__%s'')\n', name) ;
 fprintf (f, 'define(`GB_AxD'', `GB_AxD__%s'')\n', name) ;
 fprintf (f, 'define(`GB_DxB'', `GB_DxB__%s'')\n', name) ;
+fprintf (f, 'define(`GB_Cdense_accumA'', `GB_Cdense_accumA__%s'')\n', name) ;
+fprintf (f, 'define(`GB_Cdense_accumX'', `GB_Cdense_accumX__%s'')\n', name) ;
+fprintf (f, 'define(`GB_Cdense_ewise3_noaccum'', `GB_Cdense_ewise3_noaccum__%s'')\n', name) ;
+
+% subset of operators for ewise3_accum
+if (is_binop_subset)
+    fprintf (f, 'define(`GB_Cdense_ewise3_accum'', `GB_Cdense_ewise3_accum__%s'')\n', name) ;
+    fprintf (f, 'define(`if_is_binop_subset'', `'')\n') ;
+    fprintf (f, 'define(`endif_is_binop_subset'', `'')\n') ;
+else
+    fprintf (f, 'define(`GB_Cdense_ewise3_accum'', `(none)'')\n') ;
+    fprintf (f, 'define(`if_is_binop_subset'', `#if 0'')\n') ;
+    fprintf (f, 'define(`endif_is_binop_subset'', `#endif'')\n') ;
+end
+
+if (isequal (binop, 'second'))
+    fprintf (f, 'define(`GB_op_is_second'', `1'')\n') ;
+else
+    fprintf (f, 'define(`GB_op_is_second'', `0'')\n') ;
+end
+
+% determine the names of the dense GB_cblas_* gateway routines to use
+is_fp32 = isequal (xytype, 'float') ;
+is_fp64 = isequal (xytype, 'double') ;
+is_real = is_fp32 || is_fp64 ;
+is_plus  = isequal (binop, 'plus') ;
+is_minus = isequal (binop, 'minus') ;
+if (is_real && (is_plus || is_minus))
+    if (is_plus)
+        fprintf (f, 'define(`GB_op_is_plus_real'', `1'')\n') ;
+        fprintf (f, 'define(`GB_op_is_minus_real'', `0'')\n') ;
+    else
+        fprintf (f, 'define(`GB_op_is_plus_real'', `0'')\n') ;
+        fprintf (f, 'define(`GB_op_is_minus_real'', `1'')\n') ;
+    end
+    if (is_fp32)
+        fprintf (f, 'define(`GB_cblas_axpy'', `GB_cblas_saxpy'')\n') ;
+    else
+        fprintf (f, 'define(`GB_cblas_axpy'', `GB_cblas_daxpy'')\n') ;
+    end
+else
+    fprintf (f, 'define(`GB_op_is_plus_real'', `0'')\n') ;
+    fprintf (f, 'define(`GB_op_is_minus_real'', `0'')\n') ;
+    fprintf (f, 'define(`GB_cblas_axpy'', `(none)'')\n') ;
+end
 
 % type of C, A, and B
 if (iscompare)
@@ -27,15 +78,28 @@ function codegen_binop_method (binop, op, iscompare, xytype)
 fprintf (f, 'define(`GB_atype'', `%s'')\n', xytype) ;
 fprintf (f, 'define(`GB_btype'', `%s'')\n', xytype) ;
 
+% C_dense_update: operators z=f(x,y) where ztype and xtype match, and op is not 'first'
+if (isequal (xytype, ztype) && ~isequal (binop, 'first'))
+    fprintf (f, 'define(`GB_C_dense_update'', `1'')\n') ;
+    fprintf (f, 'define(`if_C_dense_update'', `'')\n') ;
+    fprintf (f, 'define(`endif_C_dense_update'', `'')\n') ;
+else
+    fprintf (f, 'define(`GB_C_dense_update'', `0'')\n') ;
+    fprintf (f, 'define(`if_C_dense_update'', `#if 0'')\n') ;
+    fprintf (f, 'define(`endif_C_dense_update'', `#endif'')\n') ;
+end
+
 % to get an entry from A
-if (isequal (binop, 'second'))
+if (isequal (binop, 'second') || isequal (binop, 'pair'))
+    % the value of A is ignored
     fprintf (f, 'define(`GB_geta'', `;'')\n') ;
 else
     fprintf (f, 'define(`GB_geta'', `%s $1 = $2 [$3]'')\n', xytype) ;
 end
 
 % to get an entry from B
-if (isequal (binop, 'first'))
+if (isequal (binop, 'first') || isequal (binop, 'pair'))
+    % the value of B is ignored
     fprintf (f, 'define(`GB_getb'', `;'')\n') ;
 else
     fprintf (f, 'define(`GB_getb'', `%s $1 = $2 [$3]'')\n', xytype) ;
@@ -68,16 +132,18 @@ function codegen_binop_method (binop, op, iscompare, xytype)
 
 fclose (f) ;
 
+trim = 25 ;
+
 % construct the *.c file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_binop.c | m4 | tail -n +12 > Generated/GB_binop__%s.c', ...
-name) ;
+'cat control.m4 Generator/GB_binop.c | m4 | tail -n +%d > Generated/GB_binop__%s.c', ...
+trim, name) ;
 fprintf ('.') ;
 system (cmd) ;
 
 % append to the *.h file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_binop.h | m4 | tail -n +12 >> Generated/GB_binop__include.h') ;
+'cat control.m4 Generator/GB_binop.h | m4 | tail -n +%d >> Generated/GB_binop__include.h', trim) ;
 system (cmd) ;
 
 delete ('control.m4') ;
diff --git a/Source/codegen_binop_template.m b/Source/codegen_binop_template.m
index 5492545b73..e993c704e6 100644
--- a/Source/codegen_binop_template.m
+++ b/Source/codegen_binop_template.m
@@ -1,10 +1,12 @@
-function codegen_binop_template (binop, iscompare, bfunc, ifunc, ffunc, dfunc)
+function codegen_binop_template (binop, iscompare, bfunc, ifunc, ffunc, dfunc, is_binop_subset)
 %CODEGEN_BINOP_TEMPLATE create binop functions
 %
 % Generate functions for a binary operator, for all types.
 
 fprintf ('\n%-7s', binop) ;
 
+assert (~isequal (binop, 'any'))
+
 if (nargin < 5)
     ffunc = [ ] ;
 end
@@ -13,6 +15,10 @@ function codegen_binop_template (binop, iscompare, bfunc, ifunc, ffunc, dfunc)
     dfunc = [ ] ;
 end
 
+if (nargin < 7)
+    is_binop_subset = false ;
+end
+
 if (isempty (ffunc))
     ffunc = ifunc ;
 end
@@ -22,19 +28,19 @@ function codegen_binop_template (binop, iscompare, bfunc, ifunc, ffunc, dfunc)
 end
 
 % integer and floating-point operators
-codegen_binop_method (binop, ifunc, iscompare, 'int8_t'  ) ;
-codegen_binop_method (binop, ifunc, iscompare, 'int16_t' ) ;
-codegen_binop_method (binop, ifunc, iscompare, 'int32_t' ) ;
-codegen_binop_method (binop, ifunc, iscompare, 'int64_t' ) ;
-codegen_binop_method (binop, ifunc, iscompare, 'uint8_t' ) ;
-codegen_binop_method (binop, ifunc, iscompare, 'uint16_t') ;
-codegen_binop_method (binop, ifunc, iscompare, 'uint32_t') ;
-codegen_binop_method (binop, ifunc, iscompare, 'uint64_t') ;
-codegen_binop_method (binop, ffunc, iscompare, 'float'   ) ;
-codegen_binop_method (binop, dfunc, iscompare, 'double'  ) ;
+codegen_binop_method (binop, ifunc, iscompare, 'int8_t'  , is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'int16_t' , is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'int32_t' , is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'int64_t' , is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'uint8_t' , is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'uint16_t', is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'uint32_t', is_binop_subset) ;
+codegen_binop_method (binop, ifunc, iscompare, 'uint64_t', is_binop_subset) ;
+codegen_binop_method (binop, ffunc, iscompare, 'float'   , is_binop_subset) ;
+codegen_binop_method (binop, dfunc, iscompare, 'double'  , is_binop_subset) ;
 
 % boolean operators
 if (~isempty (bfunc))
-    codegen_binop_method (binop, bfunc, iscompare, 'bool') ;
+    codegen_binop_method (binop, bfunc, iscompare, 'bool', is_binop_subset) ;
 end
 
diff --git a/Source/codegen_red.m b/Source/codegen_red.m
index 4e8447687d..d6a3f5e04c 100644
--- a/Source/codegen_red.m
+++ b/Source/codegen_red.m
@@ -11,7 +11,7 @@
 fprintf (f, '// GB_red__include.h: definitions for GB_red__*.c\n') ;
 fprintf (f, '//------------------------------------------------------------------------------\n') ;
 fprintf (f, '\n') ;
-fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
 fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.\n') ;
 fprintf (f, '\n') ;
 fprintf (f, '// This file has been automatically generated from Generator/GB_red.h') ;
@@ -19,7 +19,7 @@
 fclose (f) ;
 
 %-------------------------------------------------------------------------------
-% the monoid: MIN, MAX, PLUS, TIMES, OR, AND, XOR, EQ
+% the monoid: MIN, MAX, PLUS, TIMES, ANY, OR, AND, XOR, EQ
 %-------------------------------------------------------------------------------
 
 % Note that the min and max monoids are carefully written to obtain the correct
@@ -71,6 +71,21 @@
 codegen_red_method ('max',    op, 'double'  , ...
     '((double) INFINITY)'  , '((double) -INFINITY)' , 16) ;
 
+% ANY: 11 monoids (including boolean)
+fprintf ('\nany    ') ;
+op = 'zarg = yarg' ;
+codegen_red_method ('any',    op, 'int8_t'  , '0') ;
+codegen_red_method ('any',    op, 'int16_t' , '0') ;
+codegen_red_method ('any',    op, 'int32_t' , '0') ;
+codegen_red_method ('any',    op, 'int64_t' , '0') ;
+codegen_red_method ('any',    op, 'uint8_t' , '0') ;
+codegen_red_method ('any',    op, 'uint16_t', '0') ;
+codegen_red_method ('any',    op, 'uint32_t', '0') ;
+codegen_red_method ('any',    op, 'uint64_t', '0') ;
+codegen_red_method ('any',    op, 'float'   , '0') ;
+codegen_red_method ('any',    op, 'double'  , '0') ;
+codegen_red_method ('any' ,   op, 'bool'    , '0') ;
+
 % PLUS: 10 monoids
 fprintf ('\nplus   ') ;
 op = 'zarg += yarg' ;
@@ -108,6 +123,8 @@
 codegen_red_method ('lxor', 'zarg = (zarg != yarg)', 'bool','false', [ ]    ,8);
 fprintf ('\neq     ') ;
 codegen_red_method ('eq'  , 'zarg = (zarg == yarg)', 'bool','true' , [ ]    ,8);
+fprintf ('\nany    ') ;
+codegen_red_method ('any' , 'zarg = (yarg)'        , 'bool','false') ;
 
 %-------------------------------------------------------------------------------
 % FIRST and SECOND (not monoids; used for GB_red_build__[first,second]_[type])
diff --git a/Source/codegen_red_method.m b/Source/codegen_red_method.m
index 5950a05afa..4e71484f70 100644
--- a/Source/codegen_red_method.m
+++ b/Source/codegen_red_method.m
@@ -8,6 +8,7 @@ function codegen_red_method (opname, func, atype, identity, terminal, panel)
 [aname, unsigned, bits] = codegen_type (atype) ;
 
 name = sprintf ('%s_%s', opname, aname) ;
+is_any = isequal (opname, 'any') ;
 
 % function names
 fprintf (f, 'define(`GB_red_build'', `GB_red_build__%s'')\n', name) ;
@@ -34,17 +35,28 @@ function codegen_red_method (opname, func, atype, identity, terminal, panel)
     fprintf (f, 'define(`endif_is_monoid'', `#endif'')\n') ;
 end
 
-if (~isempty (terminal))
+if (is_any)
+    fprintf (f, 'define(`GB_is_any_monoid'', `1'')\n') ;
+    fprintf (f, 'define(`GB_has_terminal'', `1'')\n') ;
+    fprintf (f, 'define(`GB_terminal_value'', `(any value)'')\n') ;
+    fprintf (f, 'define(`GB_terminal'', `break ;'')\n') ;
+elseif (~isempty (terminal))
+    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
     fprintf (f, 'define(`GB_has_terminal'', `1'')\n') ;
     fprintf (f, 'define(`GB_terminal_value'', `%s'')\n', terminal) ;
     fprintf (f, 'define(`GB_terminal'', `if (s == %s) break ;'')\n', terminal) ;
 else
+    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
     fprintf (f, 'define(`GB_has_terminal'', `0'')\n') ;
     fprintf (f, 'define(`GB_terminal_value'', `(none)'')\n') ;
     fprintf (f, 'define(`GB_terminal'', `;'')\n') ;
 end
 
-fprintf (f, 'define(`GB_panel'', `%d'')\n', panel) ;
+if (is_any)
+    fprintf (f, 'define(`GB_panel'', `(no panel)'')\n') ;
+else
+    fprintf (f, 'define(`GB_panel'', `%d'')\n', panel) ;
+end
 
 % create the operator
 func = strrep (func, 'zarg', '`$1''') ;
@@ -61,14 +73,14 @@ function codegen_red_method (opname, func, atype, identity, terminal, panel)
 
 % construct the *.c file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_red.c | m4 | tail -n +16 > Generated/GB_red__%s.c', ...
+'cat control.m4 Generator/GB_red.c | m4 | tail -n +17 > Generated/GB_red__%s.c', ...
 name) ;
 fprintf ('.') ;
 system (cmd) ;
 
 % append to the *.h file
 cmd = sprintf (...
-'cat control.m4 Generator/GB_red.h | m4 | tail -n +16 >> Generated/GB_red__include.h') ;
+'cat control.m4 Generator/GB_red.h | m4 | tail -n +17 >> Generated/GB_red__include.h') ;
 system (cmd) ;
 
 delete ('control.m4') ;
diff --git a/Source/codegen_sel.m b/Source/codegen_sel.m
index bea4cc2634..8f96cc2596 100644
--- a/Source/codegen_sel.m
+++ b/Source/codegen_sel.m
@@ -11,7 +11,7 @@
 fprintf (f, '// GB_sel__include.h: definitions for GB_sel__*.c\n') ;
 fprintf (f, '//------------------------------------------------------------------------------\n') ;
 fprintf (f, '\n') ;
-fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
 fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.\n') ;
 fprintf (f, '\n') ;
 fprintf (f, '// This file has been automatically generated from Generator/GB_sel.h') ;
diff --git a/Source/codegen_unop.m b/Source/codegen_unop.m
index 149eb74c20..ce866a8b90 100644
--- a/Source/codegen_unop.m
+++ b/Source/codegen_unop.m
@@ -11,7 +11,7 @@
 fprintf (f, '// GB_unaryop__include.h: definitions for GB_unaryop__*.c\n') ;
 fprintf (f, '//------------------------------------------------------------------------------\n') ;
 fprintf (f, '\n') ;
-fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.\n') ;
+fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.\n') ;
 fprintf (f, '// http://suitesparse.com   See GraphBLAS/Doc/License.txargt for license.\n') ;
 fprintf (f, '\n') ;
 fprintf (f, '// This file has been automatically generated from Generator/GB_unaryop.h') ;
diff --git a/TODO.txt b/TODO.txt
deleted file mode 100644
index 71a0d02753..0000000000
--- a/TODO.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-
-TODO Ewise add and emult: handle dense += sparse case efficiently
-TODO support for dense matrices (A->i and A->p as NULL pointers)
-TODO implement v1.3 of the API
-TODO add matrix I/O in binary format (see draft LAGraph_binread/binwrite)
-
diff --git a/Tcov/GB_cover_util.c b/Tcov/GB_cover_util.c
index ecdd6926b0..8aeeda613c 100644
--- a/Tcov/GB_cover_util.c
+++ b/Tcov/GB_cover_util.c
@@ -2,7 +2,7 @@
 // GB_cover_util.c: utilities for test coverage
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -28,7 +28,8 @@ void GB_cover_get ( )
 
     // get GraphBLAS_grbcov from MATLAB global workspace
     mxArray *GB_cov_matlab = NULL ;
-    GB_cov_matlab = (mxArray *) mexGetVariablePtr ("global", "GraphBLAS_grbcov") ;
+    GB_cov_matlab =
+        (mxArray *) mexGetVariablePtr ("global", "GraphBLAS_grbcov") ;
 
     if (GB_cov_matlab == NULL || mxIsEmpty (GB_cov_matlab))
     {
diff --git a/Tcov/Makefile b/Tcov/Makefile
index 1a1c649435..7ed472c67a 100644
--- a/Tcov/Makefile
+++ b/Tcov/Makefile
@@ -2,8 +2,8 @@
 # GraphBLAS/Tcov/Makefile
 #-------------------------------------------------------------------------------
 
-#  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-#  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+# http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 #-------------------------------------------------------------------------------
 
diff --git a/Tcov/README.txt b/Tcov/README.txt
index 27b397dda4..732399bdea 100644
--- a/Tcov/README.txt
+++ b/Tcov/README.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 GraphBLAS/Tcov: statement coverage tests
@@ -16,12 +16,7 @@ If you get a linking problem on linux, add this directory to your
 LD_LIBRARY_PATH, so that the libgraphblas_tcov.so constructed by grbmake can be
 found by the mexFunctions.
 
-Statement coverage tests results will be saved in Tcov/log.txt.  Note that full
-coverage requires some or all of the GraphBLAS/User/Examples/*.m4 files to
-first be moved into the GraphBLAS/User/ directory (and then run "make cmake" in
-the shell before doing testcov in MATLAB).  The tests will work without this
-step, but some statements that handle compile-time user-defined semirings in
-the User/*m4 files will not be tested.
+Statement coverage tests results will be saved in Tcov/log.txt.
 
 To list the lines covered by the test, do this in MATLAB:
 
diff --git a/Tcov/grbcover.m b/Tcov/grbcover.m
index 9dc7787fdc..761e10ab4d 100644
--- a/Tcov/grbcover.m
+++ b/Tcov/grbcover.m
@@ -6,8 +6,8 @@ function grbcover (what)
 %
 % See also: grbcover_edit, grbmake
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % compile the mexFunctions
 
diff --git a/Tcov/grbcover_edit.m b/Tcov/grbcover_edit.m
index 004c8c3d40..b669d07ab8 100644
--- a/Tcov/grbcover_edit.m
+++ b/Tcov/grbcover_edit.m
@@ -24,8 +24,8 @@
 %       case stuff :  GB_cov[count]++ ; statement
 %       default :     GB_cov[count]++ ; statement
 %
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % infiles can be a struct from dir, or a single string with one filename
 if (~isstruct (infiles))
@@ -67,6 +67,8 @@
             if (enabled)
                 fprintf (f_output, '%s  GB_cov[%d]++ ;\n', cline, count) ;
                 count = count + 1 ;
+            else
+                fprintf (f_output, '%s\n', cline) ;
             end
 
         elseif ((~isempty (strfind (cline, ' case ')) || ...
@@ -80,7 +82,9 @@
                 colon = find (cline == ':', 1) ;
                 fprintf (f_output, '%s : GB_cov[%d]++ ; %s\n', ...
                     cline (1:colon-1), count, cline (colon+1:end)) ;
-                count = count+1 ;
+                count = count + 1 ;
+            else
+                fprintf (f_output, '%s\n', cline) ;
             end
 
         else
@@ -89,10 +93,11 @@
             fprintf (f_output, '%s\n', cline) ;
 
             % determine if the code is commented out
-            if (isequal (cline, '#if 0'))
+            if (isequal (cline, '#if 0') || isequal (cline, '    #if 0'))
                 % code coverage disabled
                 enabled = false ;
-            elseif (isequal (cline, '#endif'))
+            elseif (isequal (cline, '#endif') || isequal (cline, '    #endif'))
+                % code coverage enabled
                 enabled = true ;
             end
 
diff --git a/Tcov/grbmake.m b/Tcov/grbmake.m
index 026704c66f..ed728ea193 100644
--- a/Tcov/grbmake.m
+++ b/Tcov/grbmake.m
@@ -7,8 +7,8 @@
 %
 % See also: grbcover, grbcover_edit
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % copy the GraphBLAS.h file
 copyfile ('../Include/GraphBLAS.h', 'tmp_include/GraphBLAS.h') ;
@@ -17,15 +17,13 @@
 hfiles = [ dir('../Demo/Include') ; ...
            dir('../Source/*.h') ; ...
            dir('../Source/Template') ; ...
-           dir('../Source/Generated/*.h') ; ...
-           dir('../Source/Generator/GB_AxB.*') ; ] ;
+           dir('../Source/Generated/*.h') ; ] ;
 count = grbcover_edit (hfiles, 0, 'tmp_include') ;
 fprintf ('hfile count: %d\n', count) ;
 
 % create the C files and place in tmp_source
 cfiles = [ dir('../Source/*.c') ; ...
            dir('../Source/Generated/*.c') ; ...
-
            dir('GB_cover_finish.c')
            ] ;
 count = grbcover_edit (cfiles, count, 'tmp_source') ;
diff --git a/Tcov/grbshow.m b/Tcov/grbshow.m
index 926c50b83e..e42cfd7c69 100644
--- a/Tcov/grbshow.m
+++ b/Tcov/grbshow.m
@@ -1,8 +1,8 @@
 function grbshow
 %GBSHOW create a test coverage report in tmp_cover/
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 infiles = [ dir('tmp_source/*.*') ; dir('tmp_include/*.*') ] ;
 
diff --git a/Tcov/log_Feb20_2020.txt b/Tcov/log_Feb20_2020.txt
new file mode 100644
index 0000000000..bddbcbef34
--- /dev/null
+++ b/Tcov/log_Feb20_2020.txt
@@ -0,0 +1,97 @@
+
+---------------------------------------------- [malloc] [cover]
+20-Feb-2020 12:11:59 test07b        0.1 sec coverage:   266 :   266 of 11764 (  2.3% rate:  4053.33/sec)
+20-Feb-2020 12:11:59 test01         0.2 sec coverage:  1093 :  1359 of 11764 ( 11.6% rate:  6077.29/sec)
+20-Feb-2020 12:11:59 test01         0.1 sec coverage:    30 :  1389 of 11764 ( 11.8% rate:   491.45/sec)
+20-Feb-2020 12:11:59 test83         0.0 sec coverage:    15 :  1404 of 11764 ( 11.9% rate:  3020.54/sec)
+20-Feb-2020 12:11:59 test136        0.3 sec coverage:    45 :  1449 of 11764 ( 12.3% rate:   141.84/sec)
+20-Feb-2020 12:11:59 test98         0.1 sec coverage:    16 :  1465 of 11764 ( 12.5% rate:   194.40/sec)
+20-Feb-2020 12:11:59 test84         0.1 sec coverage:    16 :  1481 of 11764 ( 12.6% rate:   283.43/sec)
+20-Feb-2020 12:12:00 test85         0.1 sec coverage:     7 :  1488 of 11764 ( 12.6% rate:   104.38/sec)
+20-Feb-2020 12:12:00 test02         0.3 sec coverage:   353 :  1841 of 11764 ( 15.6% rate:  1109.24/sec)
+20-Feb-2020 12:12:00 test148        0.4 sec coverage:    24 :  1865 of 11764 ( 15.9% rate:    57.12/sec)
+20-Feb-2020 12:12:00 test150        0.1 sec coverage:    20 :  1885 of 11764 ( 16.0% rate:   205.88/sec)
+20-Feb-2020 12:12:01 test137        0.2 sec coverage:     5 :  1890 of 11764 ( 16.1% rate:    27.54/sec)
+20-Feb-2020 12:12:01 test138        0.1 sec coverage:     5 :  1895 of 11764 ( 16.1% rate:    36.34/sec)
+20-Feb-2020 12:12:01 test139        0.6 sec coverage:    37 :  1932 of 11764 ( 16.4% rate:    63.76/sec)
+20-Feb-2020 12:12:02 test72         0.2 sec coverage:    18 :  1950 of 11764 ( 16.6% rate:    92.69/sec)
+20-Feb-2020 12:12:02 test72         0.1 sec coverage:     0 :  1950 of 11764 ( 16.6% rate:     0.00/sec)
+20-Feb-2020 12:12:02 test09         0.0 sec coverage:     1 :  1951 of 11764 ( 16.6% rate:   108.81/sec)
+20-Feb-2020 12:12:02 test109        0.1 sec coverage:     5 :  1956 of 11764 ( 16.6% rate:    72.04/sec)
+20-Feb-2020 12:12:02 test109        0.0 sec coverage:     1 :  1957 of 11764 ( 16.6% rate:   859.85/sec)
+20-Feb-2020 12:12:02 test110        0.1 sec coverage:     8 :  1965 of 11764 ( 16.7% rate:    91.83/sec)
+20-Feb-2020 12:12:02 test131        0.1 sec coverage:     2 :  1967 of 11764 ( 16.7% rate:    29.37/sec)
+20-Feb-2020 12:12:02 test132        0.1 sec coverage:    10 :  1977 of 11764 ( 16.8% rate:   129.12/sec)
+20-Feb-2020 12:12:02 test92         0.1 sec coverage:     4 :  1981 of 11764 ( 16.8% rate:    58.46/sec)
+20-Feb-2020 12:12:02 test97         0.2 sec coverage:     9 :  1990 of 11764 ( 16.9% rate:    50.17/sec)
+20-Feb-2020 12:12:02 test04         0.0 sec coverage:    16 :  2006 of 11764 ( 17.1% rate:   366.29/sec)
+20-Feb-2020 12:12:02 test05         0.0 sec coverage:     1 :  2007 of 11764 ( 17.1% rate:   135.61/sec)
+20-Feb-2020 12:12:02 test05         0.0 sec coverage:     2 :  2009 of 11764 ( 17.1% rate:   634.12/sec)
+20-Feb-2020 12:12:02 test15         0.1 sec coverage:    14 :  2023 of 11764 ( 17.2% rate:   104.21/sec)
+20-Feb-2020 12:12:02 test78         0.1 sec coverage:     2 :  2025 of 11764 ( 17.2% rate:    26.83/sec)
+20-Feb-2020 12:12:03 test82         0.1 sec coverage:    14 :  2039 of 11764 ( 17.3% rate:   100.50/sec)
+20-Feb-2020 12:12:03 test94         0.1 sec coverage:    34 :  2073 of 11764 ( 17.6% rate:   284.75/sec)
+20-Feb-2020 12:12:03 test94         0.0 sec coverage:     4 :  2077 of 11764 ( 17.7% rate:   117.27/sec)
+20-Feb-2020 12:12:03 test126        0.2 sec coverage:     7 :  2084 of 11764 ( 17.7% rate:    42.78/sec)
+20-Feb-2020 12:12:03 test03         0.1 sec coverage:     1 :  2085 of 11764 ( 17.7% rate:     8.95/sec)
+20-Feb-2020 12:12:03 test03         0.0 sec coverage:     2 :  2087 of 11764 ( 17.7% rate:    41.45/sec)
+20-Feb-2020 12:12:03 test128        0.2 sec coverage:    53 :  2140 of 11764 ( 18.2% rate:   212.18/sec)
+20-Feb-2020 12:12:04 test17         0.4 sec coverage:     4 :  2144 of 11764 ( 18.2% rate:    11.22/sec)
+20-Feb-2020 12:12:04 test108        0.4 sec coverage:    14 :  2158 of 11764 ( 18.3% rate:    36.64/sec)
+20-Feb-2020 12:12:04 test124        0.3 sec coverage:     3 :  2161 of 11764 ( 18.4% rate:     9.90/sec)
+20-Feb-2020 12:12:05 test101        0.5 sec coverage:    26 :  2187 of 11764 ( 18.6% rate:    49.65/sec)
+20-Feb-2020 12:12:06 test26         0.6 sec coverage:    74 :  2261 of 11764 ( 19.2% rate:   132.40/sec)
+20-Feb-2020 12:12:09 test141        3.6 sec coverage:  1052 :  3313 of 11764 ( 28.2% rate:   293.23/sec)
+20-Feb-2020 12:12:15 test142        6.2 sec coverage:   381 :  3694 of 11764 ( 31.4% rate:    61.76/sec)
+20-Feb-2020 12:12:15 test144        0.2 sec coverage:    11 :  3705 of 11764 ( 31.5% rate:    60.32/sec)
+20-Feb-2020 12:12:16 test145        0.2 sec coverage:    24 :  3729 of 11764 ( 31.7% rate:   122.06/sec)
+20-Feb-2020 12:12:16 test147        0.2 sec coverage:     1 :  3730 of 11764 ( 31.7% rate:     4.39/sec)
+20-Feb-2020 12:12:16 test146        0.1 sec coverage:     2 :  3732 of 11764 ( 31.7% rate:    29.59/sec)
+20-Feb-2020 12:12:17 test149        1.1 sec coverage:    10 :  3742 of 11764 ( 31.8% rate:     9.21/sec)
+20-Feb-2020 12:12:18 test133        0.5 sec coverage:     8 :  3750 of 11764 ( 31.9% rate:    14.96/sec)
+20-Feb-2020 12:12:19 test29         1.1 sec coverage:   126 :  3876 of 11764 ( 32.9% rate:   118.05/sec)
+20-Feb-2020 12:12:19 test90         0.5 sec coverage:    17 :  3893 of 11764 ( 33.1% rate:    35.34/sec)
+20-Feb-2020 12:12:19 testc2(1)      0.3 sec coverage:    32 :  3925 of 11764 ( 33.4% rate:   119.52/sec)
+20-Feb-2020 12:12:20 test80         0.9 sec coverage:    16 :  3941 of 11764 ( 33.5% rate:    17.80/sec)
+20-Feb-2020 12:12:22 test130        2.0 sec coverage:    23 :  3964 of 11764 ( 33.7% rate:    11.48/sec)
+20-Feb-2020 12:12:31 test14         8.6 sec coverage:   142 :  4106 of 11764 ( 34.9% rate:    16.58/sec)
+20-Feb-2020 12:12:34 test129        3.5 sec coverage:     9 :  4115 of 11764 ( 35.0% rate:     2.55/sec)
+20-Feb-2020 12:12:36 test102        1.3 sec coverage:     3 :  4118 of 11764 ( 35.0% rate:     2.24/sec)
+20-Feb-2020 12:12:38 test12         1.8 sec coverage:     2 :  4120 of 11764 ( 35.0% rate:     1.13/sec)
+20-Feb-2020 12:12:38 test28         0.5 sec coverage:     2 :  4122 of 11764 ( 35.0% rate:     3.93/sec)
+20-Feb-2020 12:12:40 test107        1.5 sec coverage:     4 :  4126 of 11764 ( 35.1% rate:     2.62/sec)
+20-Feb-2020 12:12:42 test103        2.5 sec coverage:     1 :  4127 of 11764 ( 35.1% rate:     0.41/sec)
+20-Feb-2020 12:12:45 test93         2.9 sec coverage:     3 :  4130 of 11764 ( 35.1% rate:     1.05/sec)
+20-Feb-2020 12:12:46 test135        1.4 sec coverage:     2 :  4132 of 11764 ( 35.1% rate:     1.42/sec)
+20-Feb-2020 12:12:50 test100        3.3 sec coverage:     1 :  4133 of 11764 ( 35.1% rate:     0.31/sec)
+20-Feb-2020 12:12:53 test11         3.5 sec coverage:     4 :  4137 of 11764 ( 35.2% rate:     1.13/sec)
+20-Feb-2020 12:13:00 test106        6.9 sec coverage:     4 :  4141 of 11764 ( 35.2% rate:     0.58/sec)
+20-Feb-2020 12:13:15 test69        15.2 sec coverage:     7 :  4148 of 11764 ( 35.3% rate:     0.46/sec)
+20-Feb-2020 12:13:27 test77        11.8 sec coverage:    16 :  4164 of 11764 ( 35.4% rate:     1.36/sec)
+20-Feb-2020 12:13:33 test19b        6.4 sec coverage:    88 :  4252 of 11764 ( 36.1% rate:    13.77/sec)
+20-Feb-2020 12:13:38 test19b        4.4 sec coverage:    12 :  4264 of 11764 ( 36.2% rate:     2.72/sec)
+20-Feb-2020 12:13:39 test104        1.5 sec coverage:     1 :  4265 of 11764 ( 36.3% rate:     0.69/sec)
+20-Feb-2020 12:14:18 test125       38.8 sec coverage:   542 :  4807 of 11764 ( 40.9% rate:    13.95/sec)
+20-Feb-2020 12:16:33 test74       134.6 sec coverage:  4090 :  8897 of 11764 ( 75.6% rate:    30.39/sec)
+20-Feb-2020 12:16:51 test54        17.8 sec coverage:    21 :  8918 of 11764 ( 75.8% rate:     1.18/sec)
+20-Feb-2020 12:17:13 test23        22.8 sec coverage:    77 :  8995 of 11764 ( 76.5% rate:     3.37/sec)
+20-Feb-2020 12:17:21 test00         7.8 sec coverage:     9 :  9004 of 11764 ( 76.5% rate:     1.15/sec)
+20-Feb-2020 12:17:34 test76        12.6 sec coverage:    18 :  9022 of 11764 ( 76.7% rate:     1.43/sec)
+20-Feb-2020 12:17:36 test88         1.9 sec coverage:     5 :  9027 of 11764 ( 76.7% rate:     2.60/sec)
+20-Feb-2020 12:18:00 test127       23.8 sec coverage:     3 :  9030 of 11764 ( 76.8% rate:     0.13/sec)
+20-Feb-2020 12:18:17 test143       17.4 sec coverage:    11 :  9041 of 11764 ( 76.9% rate:     0.63/sec)
+20-Feb-2020 12:18:37 test99        20.0 sec coverage:     3 :  9044 of 11764 ( 76.9% rate:     0.15/sec)
+20-Feb-2020 12:19:08 test19        31.0 sec coverage:    12 :  9056 of 11764 ( 77.0% rate:     0.39/sec)
+20-Feb-2020 12:19:47 test53        38.9 sec coverage:     4 :  9060 of 11764 ( 77.0% rate:     0.10/sec)
+20-Feb-2020 12:20:06 test27        19.1 sec coverage:     1 :  9061 of 11764 ( 77.0% rate:     0.05/sec)
+[malloc debugging turned off]
+20-Feb-2020 12:21:37 test10        91.2 sec coverage:  1005 : 10066 of 11764 ( 85.6% rate:    11.02/sec)
+20-Feb-2020 12:22:58 test134       81.3 sec coverage:   303 : 10369 of 11764 ( 88.1% rate:     3.73/sec)
+20-Feb-2020 12:29:07 test75b      368.2 sec coverage:  1346 : 11715 of 11764 ( 99.6% rate:     3.66/sec)
+20-Feb-2020 12:30:20 test21        73.4 sec coverage:    15 : 11730 of 11764 ( 99.7% rate:     0.20/sec)
+20-Feb-2020 12:32:23 test16       122.6 sec coverage:     7 : 11737 of 11764 ( 99.8% rate:     0.06/sec)
+20-Feb-2020 12:34:12 test81       109.4 sec coverage:     6 : 11743 of 11764 ( 99.8% rate:     0.05/sec)
+20-Feb-2020 12:36:09 test21b      116.5 sec coverage:     3 : 11746 of 11764 ( 99.8% rate:     0.03/sec)
+20-Feb-2020 12:40:11 test18       242.0 sec coverage:     7 : 11753 of 11764 ( 99.9% rate:     0.03/sec)
+20-Feb-2020 12:48:14 test20       483.3 sec coverage:    11 :   all 11764 (full 100% rate:     0.02/sec)
+[malloc debugging turned back on]
diff --git a/Tcov/log_Oct2_2019.txt b/Tcov/log_Oct2_2019.txt
deleted file mode 100644
index 4e3d896e55..0000000000
--- a/Tcov/log_Oct2_2019.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-
----------------------------------------------- [malloc] [cover]
-
----------------------------------------------- [malloc] [cover]
-02-Oct-2019 14:15:44 test136        0.3 sec coverage:   326 of  9558 (  3.4%)
-02-Oct-2019 14:15:45 test137        0.2 sec coverage:   359 of  9558 (  3.8%)
-02-Oct-2019 14:15:45 test138        0.2 sec coverage:   383 of  9558 (  4.0%)
-02-Oct-2019 14:15:45 test139        0.2 sec coverage:   391 of  9558 (  4.1%)
-02-Oct-2019 14:15:46 test72         1.2 sec coverage:   616 of  9558 (  6.4%)
-02-Oct-2019 14:15:46 test72         0.1 sec coverage:   635 of  9558 (  6.6%)
-02-Oct-2019 14:15:46 test07b        0.0 sec coverage:   671 of  9558 (  7.0%)
-02-Oct-2019 14:15:46 test09         0.0 sec coverage:   673 of  9558 (  7.0%)
-02-Oct-2019 14:15:46 test83         0.0 sec coverage:   689 of  9558 (  7.2%)
-02-Oct-2019 14:15:46 test84         0.1 sec coverage:   716 of  9558 (  7.5%)
-02-Oct-2019 14:15:47 test84         0.0 sec coverage:   718 of  9558 (  7.5%)
-02-Oct-2019 14:15:47 test85         0.1 sec coverage:   733 of  9558 (  7.7%)
-02-Oct-2019 14:15:47 test85         0.0 sec coverage:   734 of  9558 (  7.7%)
-02-Oct-2019 14:15:47 test109        0.1 sec coverage:   743 of  9558 (  7.8%)
-02-Oct-2019 14:15:47 test109        0.0 sec coverage:   744 of  9558 (  7.8%)
-02-Oct-2019 14:15:47 test110        0.1 sec coverage:   763 of  9558 (  8.0%)
-02-Oct-2019 14:15:47 test131        0.1 sec coverage:   786 of  9558 (  8.2%)
-02-Oct-2019 14:15:47 test132        0.1 sec coverage:   814 of  9558 (  8.5%)
-02-Oct-2019 14:15:47 test98         0.0 sec coverage:   823 of  9558 (  8.6%)
-02-Oct-2019 14:15:47 test98         0.0 sec coverage:   825 of  9558 (  8.6%)
-02-Oct-2019 14:15:47 test92         0.1 sec coverage:   834 of  9558 (  8.7%)
-02-Oct-2019 14:15:47 test97         0.3 sec coverage:   847 of  9558 (  8.9%)
-02-Oct-2019 14:15:48 test01         0.2 sec coverage:  1574 of  9558 ( 16.5%)
-02-Oct-2019 14:15:48 test01         0.1 sec coverage:  1581 of  9558 ( 16.5%)
-02-Oct-2019 14:15:48 test04         0.1 sec coverage:  1589 of  9558 ( 16.6%)
-02-Oct-2019 14:15:48 test05         0.0 sec coverage:  1592 of  9558 ( 16.7%)
-02-Oct-2019 14:15:48 test05         0.0 sec coverage:  1594 of  9558 ( 16.7%)
-02-Oct-2019 14:15:48 test15         0.2 sec coverage:  1608 of  9558 ( 16.8%)
-02-Oct-2019 14:15:48 test15         0.0 sec coverage:  1609 of  9558 ( 16.8%)
-02-Oct-2019 14:15:48 test78         0.1 sec coverage:  1611 of  9558 ( 16.9%)
-02-Oct-2019 14:15:48 test82         0.2 sec coverage:  1622 of  9558 ( 17.0%)
-02-Oct-2019 14:15:48 test94         0.2 sec coverage:  1663 of  9558 ( 17.4%)
-02-Oct-2019 14:15:49 test94         0.0 sec coverage:  1666 of  9558 ( 17.4%)
-02-Oct-2019 14:15:49 test126        0.2 sec coverage:  1676 of  9558 ( 17.5%)
-02-Oct-2019 14:15:49 test03         0.2 sec coverage:  1680 of  9558 ( 17.6%)
-02-Oct-2019 14:15:49 test03         0.1 sec coverage:  1682 of  9558 ( 17.6%)
-02-Oct-2019 14:15:49 test128        0.3 sec coverage:  1744 of  9558 ( 18.2%)
-02-Oct-2019 14:15:50 test02         0.3 sec coverage:  2077 of  9558 ( 21.7%)
-02-Oct-2019 14:15:50 test17         0.5 sec coverage:  2081 of  9558 ( 21.8%)
-02-Oct-2019 14:15:51 test108        0.4 sec coverage:  2095 of  9558 ( 21.9%)
-02-Oct-2019 14:15:51 test124        0.4 sec coverage:  2098 of  9558 ( 22.0%)
-02-Oct-2019 14:15:52 test101        0.9 sec coverage:  2124 of  9558 ( 22.2%)
-02-Oct-2019 14:15:53 test26         0.9 sec coverage:  2196 of  9558 ( 23.0%)
-02-Oct-2019 14:15:54 test133        0.8 sec coverage:  2205 of  9558 ( 23.1%)
-02-Oct-2019 14:15:54 testc2(1)      0.8 sec coverage:  2245 of  9558 ( 23.5%)
-02-Oct-2019 14:15:56 test104        1.6 sec coverage:  2246 of  9558 ( 23.5%)
-02-Oct-2019 14:15:58 test80         1.7 sec coverage:  2257 of  9558 ( 23.6%)
-02-Oct-2019 14:16:00 test130        2.6 sec coverage:  2279 of  9558 ( 23.8%)
-02-Oct-2019 14:16:05 test129        4.4 sec coverage:  2290 of  9558 ( 24.0%)
-02-Oct-2019 14:16:07 test102        2.1 sec coverage:  2293 of  9558 ( 24.0%)
-02-Oct-2019 14:16:09 test12         2.2 sec coverage:  2295 of  9558 ( 24.0%)
-02-Oct-2019 14:16:11 test28         2.1 sec coverage:  2296 of  9558 ( 24.0%)
-02-Oct-2019 14:16:14 test29         2.9 sec coverage:  2441 of  9558 ( 25.5%)
-02-Oct-2019 14:16:16 test107        2.0 sec coverage:  2445 of  9558 ( 25.6%)
-02-Oct-2019 14:16:19 test103        3.0 sec coverage:  2446 of  9558 ( 25.6%)
-02-Oct-2019 14:16:24 test93         4.6 sec coverage:  2461 of  9558 ( 25.7%)
-02-Oct-2019 14:16:26 test135        2.2 sec coverage:  2463 of  9558 ( 25.8%)
-02-Oct-2019 14:16:29 test100        3.1 sec coverage:  2485 of  9558 ( 26.0%)
-02-Oct-2019 14:16:34 test11         4.9 sec coverage:  2488 of  9558 ( 26.0%)
-02-Oct-2019 14:16:42 test106        8.1 sec coverage:  2491 of  9558 ( 26.1%)
-02-Oct-2019 14:16:50 test14         8.1 sec coverage:  2597 of  9558 ( 27.2%)
-02-Oct-2019 14:16:59 test69         9.2 sec coverage:  2599 of  9558 ( 27.2%)
-02-Oct-2019 14:17:08 test77         8.9 sec coverage:  2615 of  9558 ( 27.4%)
-02-Oct-2019 14:17:14 test19b        6.1 sec coverage:  2699 of  9558 ( 28.2%)
-02-Oct-2019 14:17:19 test19b        4.9 sec coverage:  2711 of  9558 ( 28.4%)
-02-Oct-2019 14:17:25 test00         5.8 sec coverage:  2729 of  9558 ( 28.6%)
-02-Oct-2019 14:17:40 test76        14.6 sec coverage:  2746 of  9558 ( 28.7%)
-02-Oct-2019 14:17:53 test88        13.2 sec coverage:  2751 of  9558 ( 28.8%)
-02-Oct-2019 14:18:14 test54        21.0 sec coverage:  2772 of  9558 ( 29.0%)
-02-Oct-2019 14:18:37 test127       23.6 sec coverage:  3299 of  9558 ( 34.5%)
-02-Oct-2019 14:18:59 test99        22.0 sec coverage:  3307 of  9558 ( 34.6%)
-02-Oct-2019 14:19:28 test19        28.5 sec coverage:  3315 of  9558 ( 34.7%)
-02-Oct-2019 14:19:54 test23        25.7 sec coverage:  3381 of  9558 ( 35.4%)
-02-Oct-2019 14:20:19 test125       24.8 sec coverage:  3901 of  9558 ( 40.8%)
-02-Oct-2019 14:21:02 test53        43.4 sec coverage:  3905 of  9558 ( 40.9%)
-02-Oct-2019 14:21:46 test27        44.4 sec coverage:  3906 of  9558 ( 40.9%)
-02-Oct-2019 14:24:30 test74       164.1 sec coverage:  8169 of  9558 ( 85.5%)
-02-Oct-2019 14:30:14 test90       343.3 sec coverage:  8183 of  9558 ( 85.6%)
-[malloc debugging turned off]
-02-Oct-2019 14:31:40 test134       86.4 sec coverage:  8486 of  9558 ( 88.8%)
-02-Oct-2019 14:33:09 test10        88.4 sec coverage:  9491 of  9558 ( 99.3%)
-02-Oct-2019 14:34:40 test16        91.2 sec coverage:  9511 of  9558 ( 99.5%)
-02-Oct-2019 14:35:51 test21        70.8 sec coverage:  9521 of  9558 ( 99.6%)
-02-Oct-2019 14:37:36 test81       105.1 sec coverage:  9527 of  9558 ( 99.7%)
-02-Oct-2019 14:39:29 test21b      113.3 sec coverage:  9529 of  9558 ( 99.7%)
-02-Oct-2019 14:43:00 test18       210.8 sec coverage:  9537 of  9558 ( 99.8%)
-02-Oct-2019 14:46:42 test75       221.7 sec coverage:  9552 of  9558 ( 99.9%)
-02-Oct-2019 14:53:56 test20       434.3 sec coverage:  9558 of  9558 (100.0%)
-[malloc debugging turned back on]
diff --git a/Tcov/testcov.m b/Tcov/testcov.m
index 653096aebe..555e8f6993 100644
--- a/Tcov/testcov.m
+++ b/Tcov/testcov.m
@@ -1,7 +1,7 @@
 %TESTCOV run all GraphBLAS tests, with statement coverage
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 all_tcov_time = tic ;
 try
diff --git a/Test/Contents.m b/Test/Contents.m
index 180c45da63..5af54326ae 100644
--- a/Test/Contents.m
+++ b/Test/Contents.m
@@ -47,7 +47,7 @@
 %   GB_spec_subassign             - a MATLAB mimic of GxB_subassign
 %   GB_spec_transpose             - a MATLAB mimic of GrB_transpose
 %   GB_spec_vxm                   - a MATLAB mimic of GrB_vxm
-%   GB_user_compare               - compare GraphBLAS results for complex types
+%   GB_complex_compare            - compare GraphBLAS results for complex types
 %   GB_user_op                    - apply a complex binary and unary operator
 %   GB_user_opsall                - return list of complex operators
 %   accum_mask                    - apply the mask
@@ -140,7 +140,8 @@
 %   test72   - special cases for mxm, ewise, ...
 %   test73   - performance of C = A*B, with mask
 %   test74   - test GrB_mxm: all built-in semirings
-%   test75   - test GrB_mxm and GrB_vxm on all semirings (A'B dot product)
+%   test75   - test GrB_mxm and GrB_vxm on all semirings
+%   test75b  - GrB_mxm and GrB_vxm on all semirings (shorter test than test75)
 %   test76   - test GxB_resize
 %   test77   - test GxB_kron
 %   test78   - test subref
@@ -155,7 +156,7 @@
 %   test87   - performance test of GrB_mxm
 %   test88   - test hypersparse matrices with heap-based method
 %   test89   - performance test of complex A*B
-%   test90   - test AxB with pre-compiled semirings: plus_rdiv and plus_rdiv2
+%   test90   - test AxB with user-defined semirings: plus_rdiv and plus_rdiv2
 %   test91   - test subref performance on dense vectors
 %   test92   - test GB_subref (symbolic case)
 %   test93   - test dpagerank and ipagerank
@@ -206,6 +207,17 @@
 %   test137  - GrB_eWiseMult with FIRST and SECOND operators
 %   test138  - test assign, with coarse-only tasks in IxJ slice
 %   test139  - merge sort, special cases
+%   test140  - test assign with duplicates
+%   test141  - test GrB_eWiseAdd (all types and operators) for dense matrices
+%   test142  - test GrB_assign for dense matrices
+%   test143  - test special cases for C<!M>=A*B and C<M>=A*B
+%   test144  - test GB_cumsum
+%   test145  - test dot4
+%   test146  - test C<M,struct> = scalar
+%   test147  - test C<M>A*B with very sparse M
+%   test148  - eWiseAdd with aliases
+%   test149  - test fine hash method for C<!M>=A*B
+%   test150  - test GrB_mxm with typecasting and zombies (dot3)
 
 %   testc1   - test complex operators
 %   testc2   - test complex A*B, A'*B, A*B', A'*B', A+B
@@ -222,6 +234,7 @@
 
 % Other tests:
 
+%   t74       - run test20 and test74
 %   testperf  - run all performance tests
 %   atest     - test GrB_assign and GxB_subassign
 %   atest11   - test GrB_assign and GxB_subassign
@@ -231,7 +244,7 @@
 %   grbinfo   - print info about the GraphBLAS version
 %   mtest     - test mxm
 %   longtests - very long tests
-%   gunk      - placeholder for working on test failures
+
 %   rtest     - test GrB_reduce to vector and scalar
 %   ss        - test GxB_select
 %   stest     - test GxB_select
@@ -254,16 +267,20 @@
 %   runtest          - run a single GraphBLAS test
 %   stat             - report status of statement coverage and malloc debugging
 %   GB_define        - create C source code for GraphBLAS.h
-%   GB_define2       - construct part of the GB.h file, to allow user-defined objects
+
 %   grbresults       - return time taken by last GraphBLAS function, and AxB method
 %   isequal_roundoff - compare two matrices, allowing for roundoff errors
-%   startup          - setup the path for tests in GraphBLAS/Test
+
 %   test_other       - installs all packages needed for extensive tests
 
-%   bfs_book         - graph on the cover of the book, 'Graph Algorithms in the language
+%   grb_clear_coverage - clear current statement coverage
+%   gbclear            - clear and reload GraphBLAS
+%   grb_get_coverage   - return current statement coverage
+
+%   bfs_book         - run BFS on a small graph
 %   bfs_matlab       - a simple breadth-first-search in MATLAB
 %   bfs_test         - compares bfs_matlab and GB_mex_bfs
-%   flopcount        - returns cumulative sum of flop counts for A*B or C<M>=A*B
+%   flopcount        - cumulative sum of flop counts for A*B, C<M>=A*B, C<!M>=A*B
 %   floptest         - compare flopcount with GB_mex_mxm_flops
 
 % Triangle counting:
@@ -284,3 +301,5 @@
 %   ../Demo/MATLAB/kron_demo      - test Program/kron_demo.c and compare with MATLAB kron
 %   ../Demo/MATLAB/kron_test      - test kron_demo.m
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
diff --git a/Test/GB_user_compare.m b/Test/GB_complex_compare.m
similarity index 76%
rename from Test/GB_user_compare.m
rename to Test/GB_complex_compare.m
index e25f83c6cb..a3a3d04864 100644
--- a/Test/GB_user_compare.m
+++ b/Test/GB_complex_compare.m
@@ -1,10 +1,9 @@
-function GB_user_compare (C1, C2, tol)
-%
-%GB_USER_COMPARE compare GraphBLAS results for complex types
+function GB_complex_compare (C1, C2, tol)
+%GB_COMPLEX_COMPARE compare GraphBLAS results for complex types
 %
 % compare two complex results, from GB_mex_op and GB_user_op
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (tol)
diff --git a/Test/GB_define.m b/Test/GB_define.m
index 10b51f54b9..0136bf613e 100644
--- a/Test/GB_define.m
+++ b/Test/GB_define.m
@@ -1,19 +1,19 @@
 function GB_define 
 %GB_DEFINE create C source code for GraphBLAS.h
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 ints = {'int8','uint8', 'int16','uint16', 'int32','uint32', 'int64','uint64' } ;
 floats = { 'fp32', 'fp64' } ;
 
-nonbool = [ints floats] ; 
+nonbool = [ints floats] ;
 
-numeric     = { 'min', 'max', 'plus', 'times' } ;
-numeric_ids = { inf,   -inf ,  0,     1,      } ;
+numeric     = { 'min', 'max', 'plus', 'times', 'any' } ;
+numeric_ids = {  inf , -inf ,  0    ,  1     , 999   } ;
 
-boolean     = { 'lor', 'land', 'lxor', 'eq' } ;
-boolean_ids = { false, true, false, true } ;
+boolean     = { 'lor', 'land', 'lxor', 'eq', 'any' } ;
+boolean_ids = {  false, true,   false,  true, inf  } ;
 
 %-------------------------------------------------------------------------------
 % construct all built-in monoids for GraphBLAS.h
@@ -57,6 +57,8 @@
                 s = 'INFINITY' ;
             case -inf
                 s = '-INFINITY' ;
+            case 999
+                s = '(any value)' ;
             otherwise
                 s = sprintf ('%d', id) ;
         end
@@ -71,9 +73,9 @@
     bb = boolean {k} ;
     id = boolean_ids {k} ;
     if (isequal (bb, 'eq')) 
-        op = sprintf ('GrB_%s_BOOL', upper (bb)) ;
+        op = sprintf ('GxB_%s_BOOL', upper (bb)) ;
     else
-        op = sprintf ('GrB_%s', upper (bb)) ;
+        op = sprintf ('GxB_%s', upper (bb)) ;
     end
 
     if (k == length (boolean))
@@ -83,10 +85,12 @@
     end
     fprintf ('    %-25s     // identity: ', name) ;
 
-    if (id)
+    if (id == true)
         fprintf ('true ') ;
-    else
+    elseif (id == false)
         fprintf ('false') ;
+    else
+        fprintf ('(any value)') ;
     end
 
     fprintf ('\n') ;
@@ -99,61 +103,70 @@
 fprintf ('\n') ;
 fprintf ('\n') ;
 
-% 680: x,y,z all nonboolean:  (8+6+3)*4*10
+% 1000: x,y,z all nonboolean
 n = 0 ;
-for mult = {'first', 'second', 'min', 'max', ...
-            'plus', 'minus', 'times', 'div',...
-            'iseq', 'isne', 'isgt', 'islt', 'isge', 'isle', ...
-            'lor', 'land', 'lxor', }
+for mult = {'first', 'second', 'pair', 'min', 'max', ...            % 5
+            'plus', 'minus', 'times', 'div', 'rdiv', 'rminus', ...  % 6
+            'iseq', 'isne', 'isgt', 'islt', 'isge', 'isle', ...     % 6
+            'lor', 'land', 'lxor', }                                % 3
     fmult = upper (mult {1}) ;
     fprintf ('\n') ;
     fprintf ('    // semirings with multiply op: z = %s (x,y), all types x,y,z the same:\n', fmult) ;
-    for c = nonbool
+    for c = nonbool 
         fprintf ('    ') ;
-        for add = { 'min', 'max', 'plus', 'times' }
+        for add = { 'min', 'max', 'plus', 'times', 'any' }
             ad = upper (add {1}) ;
             n = n + 1 ;
-            s = sprintf ('GrB_%s_%s_%s', ad, fmult, upper (c{1})) ;
+            s = sprintf ('GxB_%s_%s_%s', ad, fmult, upper (c{1})) ;
             fprintf ('%-23s, ', s) ;
         end
         fprintf ('\n') ;
     end
 end
 
-% 240: x,y nonboolean, z boolean: 6 * 4 * 10
+% fprintf ('\n so far %d\n', n) ;
+assert (n == 1000) ;
+% pause
+
+
+% 300: x,y nonboolean, z boolean: 6 * 4 * 10
 for mult = { 'eq', 'ne', 'gt', 'lt', 'ge', 'le' }
     fmult = upper (mult {1}) ;
     fprintf ('\n') ;
     fprintf ('    // semirings with multiply op: z = %s (x,y), where z is boolean and x,y are given by the suffix:\n', fmult) ;
     for c = nonbool
         fprintf ('    ') ;
-        for add = { 'Lor', 'Land', 'Lxor', 'eq' }
+        for add = { 'Lor', 'Land', 'Lxor', 'eq', 'any' }
             ad = upper (add {1}) ;
             n = n + 1 ;
-            s = sprintf ('GrB_%s_%s_%s', ad, fmult, upper (c{1})) ;
+            s = sprintf ('GxB_%s_%s_%s', ad, fmult, upper (c{1})) ;
             fprintf ('%-23s, ', s) ;
         end
         fprintf ('\n') ;
     end
 end
 
-% 40: x,y,z all boolean: 10 * 4
+assert (n == 1300) ;
+
+% 55: x,y,z all boolean: 11 * 5
 fprintf ('\n') ;
-fprintf ('    // purely boolean semirings (in the form GrB_(add monoid)_(multipy operator)_BOOL:\n') ;
-for mult = { 'first', 'second', 'Lor', 'Land', 'Lxor', ...
+fprintf ('    // purely boolean semirings (in the form GxB_(add monoid)_(multipy operator)_BOOL:\n') ;
+for mult = { 'first', 'second', 'pair', 'Lor', 'Land', 'Lxor', ...
              'eq', 'gt', 'lt', 'ge', 'le' }
     fmult = upper (mult {1}) ;
     fprintf ('    ') ;
-    for add = { 'Lor', 'Land', 'Lxor', 'eq' }
+    for add = { 'Lor', 'Land', 'Lxor', 'eq', 'any' }
         ad = upper (add {1}) ;
         n = n + 1 ;
-        s = sprintf ('GrB_%s_%s_BOOL', ad, fmult) ;
+        s = sprintf ('GxB_%s_%s_BOOL', ad, fmult) ;
         fprintf ('%-23s, ', s) ;
     end
     fprintf ('\n') ;
 end
 
 fprintf ('semirings: %d\n', n) ;
+assert (n == 1355) ;
+pause
 
 
 %-------------------------------------------------------------------------------
@@ -162,22 +175,22 @@
 
 n = 0 ;
 
-% 680: x,y,z all nonboolean:  (8+6+3)*4*10
+% 1000: x,y,z all nonboolean:  (8+6+3)*4*10
 
 fprintf ('    if (zcode != GB_BOOL_code)\n') ;
 fprintf ('    {\n') ;
-fprintf ('        switch (multcode)\n') ;
+fprintf ('        switch (mult_opcode)\n') ;
 fprintf ('        {\n') ;
-for mult = {'first', 'second', 'min', 'max', ...
-    'plus', 'minus', 'times', 'div',...
+for mult = {'first', 'second', 'pair', 'min', 'max', ...
+    'plus', 'minus', 'rminus', 'times', 'div', 'rdiv', ...
     'iseq', 'isne', 'isgt', 'islt', 'isge', 'isle', ...
     'lor', 'land', 'lxor', }
     fmult = upper (mult {1}) ;
     fprintf ('\n') ;
-    fprintf ('            case GB_%s_opcode : // with (4 monoids) x (10 nonboolean types)\n\n', fmult) ;
-    fprintf ('                switch (addcode)\n') ;
+    fprintf ('            case GB_%s_opcode : // with (5 monoids) x (10 nonboolean types)\n\n', fmult) ;
+    fprintf ('                switch (add_opcode)\n') ;
     fprintf ('                {\n') ;
-    for add = { 'min', 'max', 'plus', 'times' }
+    for add = { 'min', 'max', 'plus', 'times', 'any' }
         ad = upper (add {1}) ;
         fprintf ('\n') ;
         fprintf ('                    case GB_%s_opcode :\n', upper (ad)) ;
@@ -189,12 +202,12 @@
             T = upper (c {1}) ;
             s = sprintf ('GB_%s_code', T) ;
             fprintf ('                            case %-14s: ', s) ;
-            s = sprintf ('GrB_%s_%s_%s', ad, fmult, T) ;
-            fprintf ('s = %-22s ;\n', s) ;
+            s = sprintf ('GxB_%s_%s_%s', ad, fmult, T) ;
+            fprintf ('return (%-23s) ;\n', s) ;
         end
-        fprintf ('                            default : ; \n') ;
+        fprintf ('                            default : ;\n') ;
         fprintf ('                        }\n') ;
-        fprintf ('                        break; \n') ;
+        fprintf ('                        break ;\n') ;
     end
     fprintf ('\n') ;
     fprintf ('                    default : ;\n') ;
@@ -205,36 +218,38 @@
 fprintf ('        }\n') ;
 fprintf ('    }\n') ;
 
-% 240: x,y nonboolean, z boolean: 6 * 4 * 10
+assert (n == 1000) ;
+
+% 300: x,y nonboolean, z boolean: 6 * 4 * 10
 
-fprintf ('    else if (xcode != GB_BOOL_code)\n') ;
+fprintf ('    else if (xycode != GB_BOOL_code)\n') ;
 fprintf ('    {\n') ;
-fprintf ('        switch (multcode)\n') ;
+fprintf ('        switch (mult_opcode)\n') ;
 fprintf ('        {\n') ;
 for mult = { 'eq', 'ne', 'gt', 'lt', 'ge', 'le' }
     fmult = upper (mult {1}) ;
     fprintf ('\n') ;
-    fprintf ('            case GB_%s_opcode : // with (4 bool monoids) x (10 nonboolean types)\n\n', fmult) ;
-    fprintf ('                switch (addcode)\n') ;
+    fprintf ('            case GB_%s_opcode : // with (5 bool monoids) x (10 nonboolean types)\n\n', fmult) ;
+    fprintf ('                switch (add_opcode)\n') ;
     fprintf ('                {\n') ;
-    for add = { 'Lor', 'Land', 'Lxor', 'eq' }
+    for add = { 'Lor', 'Land', 'Lxor', 'eq', 'any' }
         ad = upper (add {1}) ;
         fprintf ('\n') ;
         fprintf ('                    case GB_%s_opcode :\n', upper (ad)) ;
         fprintf ('\n') ;
-        fprintf ('                        switch (zcode)\n') ;
+        fprintf ('                        switch (xycode)\n') ;
         fprintf ('                        {\n') ;
         for c = nonbool
             n = n + 1 ;
             T = upper (c {1}) ;
             s = sprintf ('GB_%s_code', T) ;
             fprintf ('                            case %-14s: ', s) ;
-            s = sprintf ('GrB_%s_%s_%s', ad, fmult, T) ;
-            fprintf (' s = %-22s ;\n', s) ;
+            s = sprintf ('GxB_%s_%s_%s', ad, fmult, T) ;
+            fprintf ('return (%-23s) ;\n', s) ;
         end
-        fprintf ('                            default : ; \n') ;
+        fprintf ('                            default : ;\n') ;
         fprintf ('                        }\n') ;
-        fprintf ('                        break; \n') ;
+        fprintf ('                        break ;\n') ;
     end
     fprintf ('\n') ;
     fprintf ('                    default : ;\n') ;
@@ -245,26 +260,28 @@
 fprintf ('        }\n') ;
 fprintf ('    }\n') ;
 
-% 40: x,y,z all boolean: 10 * 4
+assert (n == 1300) ;
+
+% 55: x,y,z all boolean: 11 * 5
 fprintf ('    else // purely boolean semirings\n') ;
 
 fprintf ('    {\n') ;
-fprintf ('        switch (multcode)\n') ;
+fprintf ('        switch (mult_opcode)\n') ;
 fprintf ('        {\n') ;
-for mult = { 'first', 'second', 'Lor', 'Land', 'Lxor', ...
+for mult = { 'first', 'second', 'pair', 'Lor', 'Land', 'Lxor', ...
              'eq', 'gt', 'lt', 'ge', 'le' }
     fmult = upper (mult {1}) ;
     fprintf ('\n') ;
     fprintf ('            case GB_%s_opcode :\n\n', fmult) ;
-    fprintf ('                switch (addcode)\n') ;
+    fprintf ('                switch (add_opcode)\n') ;
     fprintf ('                {\n') ;
-    for add = { 'Lor', 'Land', 'Lxor', 'eq' }
+    for add = { 'Lor', 'Land', 'Lxor', 'eq', 'any' }
         n = n + 1 ;
         ad = upper (add {1}) ;
         s = sprintf ('GB_%s_opcode', ad) ;
         fprintf ('                    case %-20s : ', s) ;
-        s = sprintf ('GrB_%s_%s_BOOL', ad, fmult) ;
-        fprintf (' s = %-22s ;\n', s) ;
+        s = sprintf ('GxB_%s_%s_BOOL', ad, fmult) ;
+        fprintf ('return (%-23s) ;\n', s) ;
     end
     fprintf ('                    default : ;\n') ;
     fprintf ('                }\n') ;
diff --git a/Test/GB_define2.m b/Test/GB_define2.m
deleted file mode 100644
index f4b00adbfc..0000000000
--- a/Test/GB_define2.m
+++ /dev/null
@@ -1,242 +0,0 @@
-function GBdefine
-%GB_DEFINE2 construct part of the GB.h file, to allow user-defined objects
-% defined at compile time to access built-in objects
-
-types = {
-'BOOL',
-'INT8',
-'UINT8',
-'INT16',
-'UINT16',
-'INT32',
-'UINT32',
-'INT64',
-'UINT64',
-'FP32',
-'FP64' } ;
-
-ctypes = {
-'bool',
-'int8_t',
-'uint8_t',
-'int16_t',
-'uint16_t',
-'int32_t',
-'uint32_t',
-'int64_t',
-'uint64_t',
-'float',
-'double' } ;
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// built-in types\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-    for t = 1:length (types)
-        gt = types  {t} ;
-        ct = ctypes {t} ;
-        fprintf ('#define GB_DEF_GrB_%s_type %s\n', gt, ct) ;
-    end
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// built-in unary operators\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-uops = {
-'IDENTITY',
-'AINV'
-'MINV',
-'LNOT',     % lnot_type and following are extensions
-'ONE',
-'ABS' } ;
-
-for k = 1:length (uops)
-    op = uops {k} ;
-    fprintf ('// op: %s\n', op) ;
-    for t = 1:length (types)
-        gt = types  {t} ;
-        ct = ctypes {t} ;
-        if (k <= 3)
-            kind = 'r' ;
-        else
-            kind = 'x' ;
-        end
-        fprintf ('#define GB_DEF_G%sB_%s_%s_function GB_%s_f_%s\n', ...
-            kind, op, gt, op, gt) ;
-        fprintf ('#define GB_DEF_G%sB_%s_%s_ztype %s\n', kind, op, gt, ct) ;
-        fprintf ('#define GB_DEF_G%sB_%s_%s_xtype %s\n', kind, op, gt, ct) ;
-        fprintf ('\n') ;
-    end
-end
-
-% GrB_LNOT
-fprintf ('#define GB_DEF_GrB_LNOT_function GB_LNOT_f_BOOL\n') ;
-fprintf ('#define GB_DEF_GrB_LNOT_ztype bool\n') ;
-fprintf ('#define GB_DEF_GrB_LNOT_xtype bool\n') ;
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// binary operators of the form z=f(x,y): TxT -> T\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-ops1 = {
-'FIRST',
-'SECOND',
-'MIN',
-'MAX',
-'PLUS',
-'MINUS',
-'RMINUS',
-'TIMES',
-'DIV',
-'RDIV',
-'ISEQ',     % iseq and following are extensions
-'ISNE',
-'ISGT',
-'ISLT',
-'ISGE',
-'ISLE',
-'LOR',
-'LAND',
-'LXOR' } ;
-
-for k = 1:length (ops1)
-    op = ops1 {k} ;
-    fprintf ('// op: %s\n', op) ;
-    for t = 1:length (types)
-        gt = types  {t} ;
-        ct = ctypes {t} ;
-        if (k <= 8)
-            kind = 'r' ;
-        else
-            kind = 'x' ;
-        end
-        fprintf ('#define GB_DEF_G%sB_%s_%s_function GB_%s_f_%s\n', ...
-            kind, op, gt, op, gt) ;
-        fprintf ('#define GB_DEF_G%sB_%s_%s_ztype %s\n', kind, op, gt, ct) ;
-        fprintf ('#define GB_DEF_G%sB_%s_%s_xtype %s\n', kind, op, gt, ct) ;
-        fprintf ('#define GB_DEF_G%sB_%s_%s_ytype %s\n', kind, op, gt, ct) ;
-        fprintf ('\n') ;
-    end
-end
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// binary operators of the form z=f(x,y): TxT -> bool\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-ops2 = {
-'EQ',
-'NE',
-'GT',
-'LT',
-'GE',
-'LE' } ;
-
-for k = 1:length (ops2)
-    op = ops2 {k} ;
-    fprintf ('// op: %s\n', op) ;
-    for t = 1:length (types)
-        gt = types  {t} ;
-        ct = ctypes {t} ;
-        fprintf ('#define GB_DEF_GrB_%s_%s_function GB_%s_f_%s\n', ...
-            op, gt, op, gt) ;
-        fprintf ('#define GB_DEF_GrB_%s_%s_ztype bool\n', op, gt) ;
-        fprintf ('#define GB_DEF_GrB_%s_%s_xtype %s\n', op, gt, ct) ;
-        fprintf ('#define GB_DEF_GrB_%s_%s_ytype %s\n', op, gt, ct) ;
-        fprintf ('\n') ;
-    end
-end
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// binary operators of the form z=f(x,y): bool x bool -> bool\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-ops3 = {
-'LOR',
-'LAND',
-'LXOR' } ;
-
-for k = 1:length (ops3)
-        op = ops3 {k} ;
-        fprintf ('#define GB_DEF_GrB_%s_function GB_%s_f_BOOL\n', op, op) ;
-        fprintf ('#define GB_DEF_GrB_%s_ztype bool\n', op) ;
-        fprintf ('#define GB_DEF_GrB_%s_xtype bool\n', op) ;
-        fprintf ('#define GB_DEF_GrB_%s_ytype bool\n', op) ;
-        fprintf ('\n') ;
-end
-
-
-fprintf ('\n//------------------------------------------------------\n') ;
-fprintf ('// built-in monoids\n') ;
-fprintf ('//------------------------------------------------------\n\n') ;
-
-mons = {
-'MIN',
-'MAX',
-'PLUS',
-'TIMES' } ;
-
-for k = 1:length (mons)
-    op = mons {k} ;
-    fprintf ('// op: %s\n', op) ;
-    for t = 1:length (types)
-        gt = types  {t} ;
-        ct = ctypes {t} ;
-        fprintf ('#define GB_DEF_GxB_%s_%s_add GB_%s_f_%s\n', ...
-            op, gt, op, gt) ;
-    end
-end
-
-fprintf ('\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_INT8_MONOID_identity   INT8_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_UINT8_MONOID_identity  UINT8_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_INT16_MONOID_identity  INT16_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_UINT16_MONOID_identity UINT16_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_INT32_MONOID_identity  INT32_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_UINT32_MONOID_identity UINT32_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_INT64_MONOID_identity  INT64_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_UINT64_MONOID_identity UINT64_MAX\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_FP32_MONOID_identity   INFINITY\n') ;
-fprintf ('#define GB_DEF_GxB_MIN_FP64_MONOID_identity   INFINITY\n') ;
-
-fprintf ('\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_INT8_MONOID_identity   INT8_MIN\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_UINT8_MONOID_identity  0\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_INT16_MONOID_identity  INT16_MIN\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_UINT16_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_INT32_MONOID_identity  INT32_MIN\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_UINT32_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_INT64_MONOID_identity  INT64_MIN\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_UINT64_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_FP32_MONOID_identity   (-INFINITY)\n') ;
-fprintf ('#define GB_DEF_GxB_MAX_FP64_MONOID_identity   (-INFINITY)\n') ;
-
-fprintf ('\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_INT8_MONOID_identity   0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_UINT8_MONOID_identity  0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_INT16_MONOID_identity  0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_UINT16_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_INT32_MONOID_identity  0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_UINT32_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_INT64_MONOID_identity  0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_UINT64_MONOID_identity 0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_FP32_MONOID_identity   0\n') ;
-fprintf ('#define GB_DEF_GxB_PLUS_FP64_MONOID_identity   0\n') ;
-
-fprintf ('\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_INT8_MONOID_identity   1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_UINT8_MONOID_identity  1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_INT16_MONOID_identity  1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_UINT16_MONOID_identity 1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_INT32_MONOID_identity  1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_UINT32_MONOID_identity 1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_INT64_MONOID_identity  1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_UINT64_MONOID_identity 1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_FP32_MONOID_identity   1\n') ;
-fprintf ('#define GB_DEF_GxB_TIMES_FP64_MONOID_identity   1\n') ;
-
-fprintf ('\n') ;
-fprintf ('#define GB_DEF_GxB_LOR_BOOL_MONOID_identity    false\n') ;
-fprintf ('#define GB_DEF_GxB_LAND_BOOL_MONOID_identity   true\n') ;
-fprintf ('#define GB_DEF_GxB_LXOR_BOOL_MONOID_identity   false\n') ;
-fprintf ('#define GB_DEF_GxB_EQ_BOOL_MONOID_identity     true\n') ;
-
diff --git a/Test/GB_mex.h b/Test/GB_mex.h
index 608135c9ef..7e864daa24 100644
--- a/Test/GB_mex.h
+++ b/Test/GB_mex.h
@@ -2,7 +2,7 @@
 // GB_mex.h: definitions for the MATLAB interface to GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -18,7 +18,6 @@
 // #include "GB.h"
 #include "GB_mxm.h"
 #include "GB_Pending.h"
-#include "GB_Sauna.h"
 #include "GB_add.h"
 #include "GB_subref.h"
 #include "GB_transpose.h"
@@ -278,14 +277,33 @@ bool GB_mx_xsame    // true if arrays X and Y are the same (ignoring zombies)
     int64_t *I      // row indices (for zombies), same length as X and Y
 ) ;
 
+bool GB_mx_xsame32  // true if arrays X and Y are the same (ignoring zombies)
+(
+    float *X,
+    float *Y,
+    int64_t len,    // length of X and Y
+    int64_t *I,     // row indices (for zombies), same length as X and Y
+    float eps       // error tolerance allowed (eps > 0)
+) ;
+
+bool GB_mx_xsame64  // true if arrays X and Y are the same (ignoring zombies)
+(
+    double *X,
+    double *Y,
+    int64_t len,    // length of X and Y
+    int64_t *I,     // row indices (for zombies), same length as X and Y
+    double eps      // error tolerance allowed (eps > 0)
+) ;
+
 bool GB_mx_isequal  // true if A and B are exactly the same
 (
     GrB_Matrix A,
-    GrB_Matrix B
+    GrB_Matrix B,
+    double eps      // if A and B are both FP32 or FP64, and if eps > 0,
+                    // then the values are considered equal if their relative
+                    // difference is less than or equal to eps.
 ) ;
 
-int GB_mx_Sauna_nmalloc (void) ;  // return # of mallocs in Saunas in use
-
 GrB_Matrix GB_mx_alias      // output matrix (NULL if no match found)
 (
     char *arg_name,         // name of the output matrix
@@ -368,7 +386,6 @@ GrB_Matrix GB_mx_alias      // output matrix (NULL if no match found)
     {                                                                       \
         /* brutal malloc debug */                                           \
         int nmalloc_start = (int) GB_Global_nmalloc_get ( ) ;               \
-        int nmalloc_Sauna_start = GB_mx_Sauna_nmalloc ( ) ;                 \
         for (int tries = 0 ; ; tries++)                                     \
         {                                                                   \
             /* give GraphBLAS the ability to do a # of mallocs, */          \
@@ -397,20 +414,15 @@ GrB_Matrix GB_mx_alias      // output matrix (NULL if no match found)
                 FREE_DEEP_COPY ;                                            \
                 GET_DEEP_COPY ;                                             \
                 int nmalloc_end = (int) GB_Global_nmalloc_get ( ) ;         \
-                int nmalloc_Sauna_end = GB_mx_Sauna_nmalloc ( ) ;           \
-                int nleak = ((nmalloc_end   - nmalloc_Sauna_end  ) -        \
-                             (nmalloc_start - nmalloc_Sauna_start)) ;       \
+                int nleak = nmalloc_end - nmalloc_start ;                   \
                 if (nleak > 0)                                              \
                 {                                                           \
                     /* memory leak */                                       \
                     printf ("Leak! tries %d : nleak %d\n"                   \
                         "nmalloc_end:        %d\n"                          \
-                        "nmalloc_Sauna_end   %d\n"                          \
                         "nmalloc_start:      %d\n"                          \
-                        "nmalloc_Sauna_start %d\n"                          \
                         "method [%s]\n",                                    \
-                        tries, nleak, nmalloc_end, nmalloc_Sauna_end,       \
-                        nmalloc_start, nmalloc_Sauna_start,                 \
+                        tries, nleak, nmalloc_end, nmalloc_start,           \
                         GB_STR (GRAPHBLAS_OPERATION)) ;                     \
                     mexWarnMsgIdAndTxt ("GB:leak", GrB_error ( )) ;         \
                     FREE_ALL ;                                              \
diff --git a/Test/GB_mex_AdotB.c b/Test/GB_mex_AdotB.c
index f97b7f8c1e..1f8273a840 100644
--- a/Test/GB_mex_AdotB.c
+++ b/Test/GB_mex_AdotB.c
@@ -2,7 +2,7 @@
 // GB_mex_AdotB: compute C=spones(Mask).*(A'*B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -48,7 +48,7 @@ GrB_Info adotb_complex (GB_Context Context)
         return (info) ;
     }
 
-    // force completion, since GB_AxB_meta expects its inputs to be finished
+    // force completion
     info = GrB_wait ( ) ;
     if (info != GrB_SUCCESS)
     {
@@ -56,22 +56,9 @@ GrB_Info adotb_complex (GB_Context Context)
         return (info) ;
     }
 
-    #ifdef MY_COMPLEX
-    // use the precompiled complex type
-    if (Aconj != NULL) Aconj->type = My_Complex ;
-    if (B     != NULL) B->type     = My_Complex ;
-    #endif
-
     bool mask_applied = false ;
 
-    GrB_Semiring semiring =
-        #ifdef MY_COMPLEX
-            My_Complex_plus_times ;
-        #else
-            Complex_plus_times ;
-        #endif
-
-    // GxB_print (semiring,3) ;
+    GrB_Semiring semiring = Complex_plus_times ;
 
     GrB_Matrix Aslice [1] ;
     Aslice [0] = Aconj ;
@@ -79,25 +66,18 @@ GrB_Info adotb_complex (GB_Context Context)
     if (Mask != NULL)
     {
         // C<M> = A'*B using dot product method
-        info = GB_AxB_dot3 (&C, Mask, Aconj, B, semiring, flipxy, Context) ;
+        info = GB_AxB_dot3 (&C, Mask, false, Aconj, B, semiring, flipxy, Context);
         mask_applied = true ;
     }
     else
     {
         // C = A'*B using dot product method
-        info = GB_AxB_dot2 (&C, NULL, Aslice, B, semiring, flipxy,
+        info = GB_AxB_dot2 (&C, NULL, false, Aslice, B, semiring, flipxy,
             &mask_applied,
             /* single thread: */
             1, 1, 1, Context) ;
     }
 
-    #ifdef MY_COMPLEX
-    // convert back to run-time complex type
-    if (C     != NULL) C->type     = Complex ;
-    if (B     != NULL) B->type     = Complex ;
-    if (Aconj != NULL) Aconj->type = Complex ;
-    #endif
-
     GrB_free (&Aconj) ;
     return (info) ;
 }
@@ -123,14 +103,14 @@ GrB_Info adotb (GB_Context Context)
     if (Mask != NULL)
     {
         // C<M> = A'*B using dot product method
-        info = GB_AxB_dot3 (&C, Mask, A, B,
+        info = GB_AxB_dot3 (&C, Mask, false, A, B,
             semiring /* GxB_PLUS_TIMES_FP64 */,
             flipxy, Context) ;
         mask_applied = true ;
     }
     else
     {
-        info = GB_AxB_dot2 (&C, NULL, Aslice, B,
+        info = GB_AxB_dot2 (&C, NULL, false, Aslice, B,
             semiring /* GxB_PLUS_TIMES_FP64 */,
             flipxy, &mask_applied,
             // single thread:
@@ -200,7 +180,6 @@ void mexFunction
 
         GrB_Matrix_nrows (&mnrows, Mask) ;
         GrB_Matrix_ncols (&mncols, Mask) ;
-        // GxB_print (Mask, 3) ;
 
         if (!Mask->is_csc)
         {
diff --git a/Test/GB_mex_AplusB.c b/Test/GB_mex_AplusB.c
index 3da858e615..904a0412fa 100644
--- a/Test/GB_mex_AplusB.c
+++ b/Test/GB_mex_AplusB.c
@@ -2,7 +2,7 @@
 // GB_mex_AplusB: compute C=A+B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -73,7 +73,7 @@ void mexFunction
     // simple_tic (tic2) ;
 
     // C = A+B using the op.  No mask
-    METHOD (GB_add (&C, A->type, true, NULL, A, B, op, Context)) ;
+    METHOD (GB_add (&C, A->type, true, NULL, false, A, B, op, Context)) ;
 
     // return C to MATLAB as a plain sparse matrix
     pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C AplusB result", false) ;
diff --git a/Test/GB_mex_AxB.c b/Test/GB_mex_AxB.c
index 2fcfdc989b..939515348e 100644
--- a/Test/GB_mex_AxB.c
+++ b/Test/GB_mex_AxB.c
@@ -2,7 +2,7 @@
 // GB_mex_AxB: compute C=A*B, A'*B, A*B', or A'*B'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -31,7 +31,7 @@
 
 GrB_Info info ;
 bool malloc_debug = false ;
-bool ignore = false ;
+bool ignore = false, ignore2 = false ;
 bool atranspose = false ;
 bool btranspose = false ;
 GrB_Matrix A = NULL, B = NULL, C = NULL, Aconj = NULL, Bconj = NULL,
@@ -67,16 +67,21 @@ GrB_Info axb (GB_Context Context)
 
     // C = A*B, A'*B, A*B', or A'*B'
     info = GB_AxB_meta (&C,
+        NULL,       // not in place
+        false,      // C_replace
         true,       // CSC
         NULL,       // no MT returned
         NULL,       // no Mask
         false,      // mask not complemented
+        false,      // mask not structural
+        NULL,       // no accum
         A, B,
         semiring,   // GrB_PLUS_TIMES_FP64
         atranspose,
         btranspose,
         false,      // flipxy
         &ignore,    // mask_applied
+        &ignore2,   // done_in_place
         AxB_method, &AxB_method_used, Context) ;
 
     GrB_free (&add) ;
@@ -128,7 +133,7 @@ GrB_Info axb_complex (GB_Context Context)
 
     }
 
-    // force completion, since GB_AxB_meta expects its inputs to be finished
+    // force completion
     info = GrB_wait ( ) ;
     if (info != GrB_SUCCESS)
     {
@@ -137,41 +142,25 @@ GrB_Info axb_complex (GB_Context Context)
         return (info) ;
     }
 
-    #ifdef MY_COMPLEX
-    // use the precompiled complex type
-    if (Aconj != NULL) Aconj->type = My_Complex ;
-    if (Bconj != NULL) Bconj->type = My_Complex ;
-    if (A     != NULL) A->type     = My_Complex ;
-    if (B     != NULL) B->type     = My_Complex ;
-    #endif
-
     info = GB_AxB_meta (&C,
+        NULL,       // not in place
+        false,      // C_replace
         true,       //CSC
         NULL,       // no MT returned
         NULL,       // no Mask
         false,      // mask not complemented
+        false,      // mask not structural
+        NULL,       // no accum
         (atranspose) ? Aconj : A,
         (btranspose) ? Bconj : B,
-        #ifdef MY_COMPLEX
-            My_Complex_plus_times,
-        #else
-            Complex_plus_times,
-        #endif
+        Complex_plus_times,
         atranspose,
         btranspose,
         false,      // flipxy
         &ignore,    // mask_applied
+        &ignore2,   // done_in_place
         AxB_method, &AxB_method_used, Context) ;
 
-    #ifdef MY_COMPLEX
-    // convert back to run-time complex type
-    if (C     != NULL) C->type     = Complex ;
-    if (Aconj != NULL) Aconj->type = Complex ;
-    if (Bconj != NULL) Bconj->type = Complex ;
-    if (A     != NULL) A->type     = Complex ;
-    if (B     != NULL) B->type     = Complex ;
-    #endif
-
     GrB_free (&Bconj) ;
     GrB_free (&Aconj) ;
 
@@ -192,6 +181,7 @@ void mexFunction
     info = GrB_SUCCESS ;
     malloc_debug = GB_mx_get_global (true) ;
     ignore = false ;
+    ignore2 = false ;
     A = NULL ;
     B = NULL ;
     C = NULL ;
@@ -237,6 +227,8 @@ void mexFunction
     // 1001: Gustavson
     // 1002: heap
     // 1003: dot
+    // 1004: hash
+    // 1005: saxpy
     GET_SCALAR (4, GrB_Desc_Value, AxB_method, GxB_DEFAULT) ;
 
     if (! ((AxB_method == GxB_DEFAULT) ||
diff --git a/Test/GB_mex_Col_assign.c b/Test/GB_mex_Col_assign.c
index c5d08737ce..88cf30b513 100644
--- a/Test/GB_mex_Col_assign.c
+++ b/Test/GB_mex_Col_assign.c
@@ -2,7 +2,7 @@
 // GB_mex_assign: C<Mask>(I,J) = accum (C (I,J), A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // This function is a wrapper for GrB_Matrix_assign, GrB_Matrix_assign_T
@@ -144,7 +144,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 default:
                     FREE_ALL ;
@@ -180,7 +179,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GrB_assign ((GrB_Vector) C, (GrB_Vector) Mask,
@@ -218,7 +216,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GrB_assign (C, Mask, accum, Ax, I, ni, J, nj, desc)) ;
diff --git a/Test/GB_mex_Col_extract.c b/Test/GB_mex_Col_extract.c
index 12e247c83f..b42abb0ff8 100644
--- a/Test/GB_mex_Col_extract.c
+++ b/Test/GB_mex_Col_extract.c
@@ -2,7 +2,7 @@
 // GB_mex_Col_extract: MATLAB interface for w<mask> = accum (w,A(I,j))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Matrix_build.c b/Test/GB_mex_Matrix_build.c
index 301c69a839..4b7f23815e 100644
--- a/Test/GB_mex_Matrix_build.c
+++ b/Test/GB_mex_Matrix_build.c
@@ -2,7 +2,7 @@
 // GB_mex_Matrix_build.c: MATLAB interface to GrB_Matrix_build
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Matrix_extract.c b/Test/GB_mex_Matrix_extract.c
index 136b557712..6753fa423b 100644
--- a/Test/GB_mex_Matrix_extract.c
+++ b/Test/GB_mex_Matrix_extract.c
@@ -2,7 +2,7 @@
 // GB_mex_Matrix_extract: MATLAB interface for C<Mask> = accum (C,A(I,J))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Matrix_extractElement.c b/Test/GB_mex_Matrix_extractElement.c
index 7b434eee18..d6d070c6b4 100644
--- a/Test/GB_mex_Matrix_extractElement.c
+++ b/Test/GB_mex_Matrix_extractElement.c
@@ -2,7 +2,7 @@
 // GB_mex_Matrix_extractElement: MATLAB interface for x = A(i,j)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -217,7 +217,6 @@ void mexFunction
             }
             break;
 
-        case GB_UCT_code   :
         case GB_UDT_code   :
             {
                 // user-defined complex type
diff --git a/Test/GB_mex_Matrix_subref.c b/Test/GB_mex_Matrix_subref.c
index 4a35a672aa..b01354765d 100644
--- a/Test/GB_mex_Matrix_subref.c
+++ b/Test/GB_mex_Matrix_subref.c
@@ -2,7 +2,7 @@
 // GB_mex_Matrix_subref: C=A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Vector_build.c b/Test/GB_mex_Vector_build.c
index 76cecefe95..dc064c195e 100644
--- a/Test/GB_mex_Vector_build.c
+++ b/Test/GB_mex_Vector_build.c
@@ -2,7 +2,7 @@
 // GB_mex_Vector_build.c: MATLAB interface to GrB_Vector_build
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Vector_extract.c b/Test/GB_mex_Vector_extract.c
index 29dd7b7739..7de80e20ef 100644
--- a/Test/GB_mex_Vector_extract.c
+++ b/Test/GB_mex_Vector_extract.c
@@ -2,7 +2,7 @@
 // GB_mex_Vector_extract: MATLAB interface for w<mask> = accum (w,u(I))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_Vector_extractElement.c b/Test/GB_mex_Vector_extractElement.c
index 423a839fcc..8a7e2b1d8c 100644
--- a/Test/GB_mex_Vector_extractElement.c
+++ b/Test/GB_mex_Vector_extractElement.c
@@ -2,7 +2,7 @@
 // GB_mex_Vector_extractElement: MATLAB interface for x = v(i)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -193,7 +193,6 @@ void mexFunction
             }
             break;
 
-        case GB_UCT_code   :
         case GB_UDT_code   :
             {
                 // user-defined complex type
diff --git a/Test/GB_mex_about.c b/Test/GB_mex_about.c
index b886ea6ae8..2a07ea42c7 100644
--- a/Test/GB_mex_about.c
+++ b/Test/GB_mex_about.c
@@ -2,7 +2,7 @@
 // GB_mex_about: print the 'about' information
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -238,7 +238,7 @@ void mexFunction
     GxB_get (Duh, GrB_INP0, &val) ; printf ("got inp0 %d\n", val) ;
     GxB_get (Duh, GrB_INP1, &val) ; printf ("got inp1 %d\n", val) ;
 
-    GxB_set (Duh, GrB_MASK, GrB_SCMP) ;
+    GxB_set (Duh, GrB_MASK, GrB_COMP) ;
     GB_Descriptor_check (Duh, "\n-----Duh set mask", GxB_COMPLETE, stdout, Context) ;
     GxB_get (Duh, GrB_OUTP, &val) ; printf ("got outp %d\n", val) ;
     GxB_get (Duh, GrB_MASK, &val) ; printf ("got mask %d\n", val) ;
@@ -372,7 +372,6 @@ void mexFunction
     GB_code_check (GB_FP32_code,   &f32, stdout, Context) ; printf ("\n");
     GB_code_check (GB_FP64_code,   &f64, stdout, Context) ; printf ("\n");
     GB_code_check (GB_UDT_code,    &f64, stdout, Context) ; printf ("\n");
-    GB_code_check (GB_UCT_code,    &f64, stdout, Context) ; printf ("\n");
 
     for (int i = 0 ; i <= GrB_PANIC + 1 ; i++)
     {
@@ -508,8 +507,6 @@ void mexFunction
     CHECK (GB_Global_hack_get ( ) == 90123) ;
 
     GrB_Info expected = GrB_INVALID_VALUE ;
-    ERR (GxB_set (GxB_NTHREADS, 7777777)) ;
-    printf ("Expected error: info: %d\n%s\n", info, GrB_error ( )) ;
 
     //--------------------------------------------------------------------------
     // GB_pslice
@@ -583,9 +580,9 @@ void mexFunction
     OK (GxB_print (A, GxB_COMPLETE)) ;
     OK (GxB_print (B, GxB_COMPLETE)) ;
 
-    expected = GrB_DIMENSION_MISMATCH ;
-    ERR (GxB_select (A, NULL, NULL, GxB_NE_THUNK, A, A, NULL)) ;
-    printf ("Expected error: info: %d\n%s\n", info, GrB_error ( )) ;
+    // expected = GrB_DIMENSION_MISMATCH ;
+    // ERR (GxB_select (A, NULL, NULL, GxB_NE_THUNK, A, A, NULL)) ;
+    // printf ("Expected error: info: %d\n%s\n", info, GrB_error ( )) ;
 
     GxB_Scalar thunk = NULL ;
     OK (GxB_Scalar_new (&thunk, user_type)) ;
@@ -673,59 +670,6 @@ void mexFunction
     GrB_free (&A) ;
     GrB_free (&thunk) ;
 
-    //--------------------------------------------------------------------------
-    // print user-defined objects
-    //--------------------------------------------------------------------------
-
-    #ifdef MY_BAND
-    GxB_print (My_band, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_BOOL
-    GxB_print (My_LOR, GxB_COMPLETE) ;
-    GxB_print (My_LOR_LAND, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_COMPLEX
-    GxB_print (My_Complex, GxB_COMPLETE) ;
-    GxB_print (My_Complex_plus, GxB_COMPLETE) ;
-    GxB_print (My_Complex_times, GxB_COMPLETE) ;
-    GxB_print (My_Complex_plus_monoid, GxB_COMPLETE) ;
-    GxB_print (My_Complex_plus_times, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_MAX
-    GxB_print (My_Max, GxB_COMPLETE) ;
-    GxB_print (My_Max_Terminal1, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef PAGERANK_PREDEFINED
-    GxB_print (PageRank_type, GxB_COMPLETE) ;
-    GxB_print (PageRank_init, GxB_COMPLETE) ;
-    GxB_print (PageRank_accum, GxB_COMPLETE) ;
-    GxB_print (PageRank_add, GxB_COMPLETE) ;
-    GxB_print (PageRank_monoid, GxB_COMPLETE) ;
-    GxB_print (PageRank_multiply, GxB_COMPLETE) ;
-    GxB_print (PageRank_semiring, GxB_COMPLETE) ;
-    GxB_print (PageRank_get, GxB_COMPLETE) ;
-    GxB_print (PageRank_div, GxB_COMPLETE) ;
-    GxB_print (PageRank_diff, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_RDIV
-    GxB_print (My_rdiv, GxB_COMPLETE) ;
-    GxB_print (My_plus_rdiv, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_RDIV2
-    GxB_print (My_rdiv2, GxB_COMPLETE) ;
-    GxB_print (My_plus_rdiv2, GxB_COMPLETE) ;
-    #endif
-
-    #ifdef MY_SCALE
-    GxB_print (My_scale, GxB_COMPLETE) ;
-    #endif
-
     //--------------------------------------------------------------------------
     // GxB_print for a slice or hyperslice
     //--------------------------------------------------------------------------
@@ -785,21 +729,6 @@ void mexFunction
         GrB_free (&Aslice [1]) ;
     }
 
-    //--------------------------------------------------------------------------
-    // Sauna
-    //--------------------------------------------------------------------------
-
-    GrB_Desc_Value method = GxB_AxB_GUSTAVSON ;
-    info = GrB_SUCCESS ;
-    while (info == GrB_SUCCESS)
-    {
-        info = GB_Sauna_acquire (1, &id, &method, Context) ;
-    }
-
-    expected = GrB_INVALID_VALUE ;
-    ERR (info) ;
-    printf ("Error expected: %d\n%s\n", info, GrB_error ( )) ;
-
     //--------------------------------------------------------------------------
     // pending tuples
     //--------------------------------------------------------------------------
@@ -973,6 +902,104 @@ void mexFunction
     GrB_free (&scalar) ;
     GrB_free (&scalar2) ;
 
+    //--------------------------------------------------------------------------
+    // predefined descriptors
+    //--------------------------------------------------------------------------
+
+    OK (GxB_print (GrB_DESC_T1      , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_T0      , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_T0T1    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_C       , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_CT1     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_CT0     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_CT0T1   , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_S       , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_ST1     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_ST0     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_ST0T1   , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_SC      , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_SCT1    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_SCT0    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_SCT0T1  , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_R       , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RT1     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RT0     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RT0T1   , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RC      , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RCT1    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RCT0    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RCT0T1  , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RS      , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RST1    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RST0    , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RST0T1  , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RSC     , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RSCT1   , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RSCT0   , GxB_COMPLETE)) ;
+    OK (GxB_print (GrB_DESC_RSCT0T1 , GxB_COMPLETE)) ;
+
+    GrB_Descriptor_new (&Duh) ;
+    OK (GxB_set (Duh, GxB_AxB_METHOD, GxB_AxB_SAXPY)) ;
+    OK (GxB_print (Duh, GxB_COMPLETE)) ;
+    OK (GxB_set (Duh, GxB_AxB_METHOD, GxB_AxB_HASH)) ;
+    OK (GxB_print (Duh, GxB_COMPLETE)) ;
+    OK (GxB_set (Duh, GxB_AxB_METHOD, GxB_AxB_HEAP)) ;
+    OK (GxB_print (Duh, GxB_COMPLETE)) ;
+    OK (GxB_set (Duh, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON)) ;
+    OK (GxB_print (Duh, GxB_COMPLETE)) ;
+    OK (GxB_set (Duh, GxB_AxB_METHOD, GxB_AxB_DOT)) ;
+    OK (GxB_print (Duh, GxB_COMPLETE)) ;
+    GrB_free (&Duh) ;
+
+    expected = GrB_INVALID_VALUE ;
+    ERR (GxB_set (GrB_DESC_S, GrB_INP0, GrB_TRAN)) ;
+    printf ("\nExpected error: %s\n", GrB_error ( )) ;
+
+    ERR (GrB_Descriptor_set (GrB_DESC_S, GrB_INP0, GrB_TRAN)) ;
+    printf ("\nExpected error: %s\n", GrB_error ( )) ;
+
+    //--------------------------------------------------------------------------
+    // burble
+    //--------------------------------------------------------------------------
+
+    bool burble ;
+    OK (GxB_get (GxB_BURBLE, &burble)) ;
+    printf ("burble: %d\n", burble) ;
+
+    //--------------------------------------------------------------------------
+    // select ops
+    //--------------------------------------------------------------------------
+
+    OK (GxB_print (GxB_TRIL, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_TRIU, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_DIAG, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_OFFDIAG, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_NONZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_EQ_ZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_GT_ZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_GE_ZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_LT_ZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_LE_ZERO, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_NE_THUNK, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_EQ_THUNK, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_GT_THUNK, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_GE_THUNK, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_LT_THUNK, GxB_COMPLETE)) ;
+    OK (GxB_print (GxB_LE_THUNK, GxB_COMPLETE)) ;
+
+    //--------------------------------------------------------------------------
+    // assign scalar into hypersparse
+    //--------------------------------------------------------------------------
+
+    GrB_Index n = INT32_MAX ;
+    n = n * 1024 ;
+    OK (GrB_Matrix_new (&A, GrB_FP64, n, n)) ;
+    expected = GrB_OUT_OF_MEMORY ;
+    ERR (GrB_Matrix_assign_FP64 (A, NULL, NULL, (double) 1,
+        GrB_ALL, n, GrB_ALL, n, NULL)) ;
+    printf ("\nproblem too large, expected error: %s\n", GrB_error ( )) ;
+    OK (GrB_free (&A)) ;
+
     //--------------------------------------------------------------------------
     // wrapup
     //--------------------------------------------------------------------------
diff --git a/Test/GB_mex_apply.c b/Test/GB_mex_apply.c
index e81c3fd11a..ee910875f2 100644
--- a/Test/GB_mex_apply.c
+++ b/Test/GB_mex_apply.c
@@ -2,7 +2,7 @@
 // GB_mex_apply: C<Mask> = accum(C,op(A)) or op(A')
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_apply2.c b/Test/GB_mex_apply2.c
index 07c01f8c09..b030c1961c 100644
--- a/Test/GB_mex_apply2.c
+++ b/Test/GB_mex_apply2.c
@@ -2,7 +2,7 @@
 // GB_mex_apply2: C<C> = accum(C,op(A)) or op(A')
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_assign.c b/Test/GB_mex_assign.c
index 08968a4a78..e7133b1204 100644
--- a/Test/GB_mex_assign.c
+++ b/Test/GB_mex_assign.c
@@ -2,7 +2,7 @@
 // GB_mex_assign: C<Mask>(I,J) = accum (C (I,J), A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // This function is a wrapper for GrB_Matrix_assign, GrB_Matrix_assign_T
@@ -174,7 +174,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 default:
                     FREE_ALL ;
@@ -209,7 +208,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GrB_assign ((GrB_Vector) C, (GrB_Vector) Mask,
@@ -246,7 +244,6 @@ GrB_Info assign ( )
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GrB_assign (C, Mask, accum, Ax, I, ni, J, nj, desc)) ;
diff --git a/Test/GB_mex_assign_alias.c b/Test/GB_mex_assign_alias.c
index ee6756deb7..697b57d8f4 100644
--- a/Test/GB_mex_assign_alias.c
+++ b/Test/GB_mex_assign_alias.c
@@ -2,7 +2,7 @@
 // GB_mex_assign_alias: C(I,J) = accum(C(I,J),C)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_assign_alias_mask.c b/Test/GB_mex_assign_alias_mask.c
new file mode 100644
index 0000000000..49904bfb4b
--- /dev/null
+++ b/Test/GB_mex_assign_alias_mask.c
@@ -0,0 +1,81 @@
+//------------------------------------------------------------------------------
+// GB_mex_assign_alias_mask: C<A> = A
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_assign_alias_mask (C, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&C) ;                   \
+    GB_MATRIX_FREE (&A) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 2 || nargin > 3)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+        C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [1], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (2), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    GrB_Index nrows, ncols ;
+    GrB_Matrix_nvals (&nrows, C) ;
+    GrB_Matrix_nvals (&ncols, C) ;
+
+    // C<A> = A
+    METHOD (GrB_assign (C, A, NULL, A, GrB_ALL, nrows, GrB_ALL, ncols, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_band.c b/Test/GB_mex_band.c
index efc9c131f1..393a0de26f 100644
--- a/Test/GB_mex_band.c
+++ b/Test/GB_mex_band.c
@@ -2,7 +2,7 @@
 // GB_mex_band: C = tril (triu (A,lo), hi), or with A'
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,7 +11,7 @@
 
 #include "GB_mex.h"
 
-#define USAGE "C = GB_mex_band (A, lo, hi, atranspose, pre)"
+#define USAGE "C = GB_mex_band (A, lo, hi, atranspose)"
 
 #define FREE_ALL                        \
 {                                       \
@@ -77,7 +77,7 @@ void mexFunction
 
     // check inputs
     GB_WHERE (USAGE) ;
-    if (nargout > 1 || nargin < 3 || nargin > 5)
+    if (nargout > 1 || nargin < 3 || nargin > 4)
     {
         mexErrMsgTxt ("Usage: " USAGE) ;
     }
@@ -91,13 +91,8 @@ void mexFunction
     }
 
     // create the Thunk
-    #ifdef MY_BAND
-    Thunk_type = My_bandwidth_type ;
-    my_bandwidth_type bandwidth ;
-    #else
     LoHi_type bandwidth  ;
     OK (GrB_Type_new (&Thunk_type, sizeof (LoHi_type))) ;
-    #endif
 
     // get lo and hi
     bandwidth.lo = (int64_t) mxGetScalar (pargin [1]) ;
@@ -107,7 +102,6 @@ void mexFunction
     OK (GxB_Scalar_setElement_UDT (Thunk, (void *) &bandwidth)) ;
     GrB_Index ignore ;
     OK (GxB_Scalar_nvals (&ignore, Thunk)) ;
-    // GxB_print (Thunk, 3) ;
 
     // get atranspose
     bool atranspose = false ;
@@ -118,25 +112,11 @@ void mexFunction
         OK (GxB_set (desc, GrB_INP0, GrB_TRAN)) ;
     }
 
-    // get the pre/run-time option
-    int GET_SCALAR (4, int, pre, 0) ;
-
     GB_MEX_TIC ;
 
     // create operator
-    op = NULL ;
-    if (pre)
-    {
-        // use the compile-time defined operator, My_band
-        #ifdef MY_BAND
-        op = My_band ;
-        #endif
-    }
-    if (op == NULL)
-    {
-        // use the run-time defined operator, from the band function
-        METHOD (GxB_SelectOp_new (&op, band, NULL, Thunk_type)) ;
-    }
+    // use the user-defined operator, from the band function
+    METHOD (GxB_SelectOp_new (&op, band, NULL, Thunk_type)) ;
 
     // create result matrix C
     if (atranspose)
diff --git a/Test/GB_mex_bfs.c b/Test/GB_mex_bfs.c
index 66b399aeae..c60e73fda1 100644
--- a/Test/GB_mex_bfs.c
+++ b/Test/GB_mex_bfs.c
@@ -2,7 +2,7 @@
 // GB_mex_bfs: v = bfs (A,s)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_binaryop.c b/Test/GB_mex_binaryop.c
index a78022c68b..79d0888ba2 100644
--- a/Test/GB_mex_binaryop.c
+++ b/Test/GB_mex_binaryop.c
@@ -2,7 +2,7 @@
 // GB_mex_binaryop: parse a binaryop, for testing
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_cast.c b/Test/GB_mex_cast.c
index d8cc706282..4a03e12df1 100644
--- a/Test/GB_mex_cast.c
+++ b/Test/GB_mex_cast.c
@@ -2,7 +2,7 @@
 // GB_mex_cast: cast a MATLAB array using C-style casting rules
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_clear.c b/Test/GB_mex_clear.c
index 5d18b8bdbb..9000ef1d4e 100644
--- a/Test/GB_mex_clear.c
+++ b/Test/GB_mex_clear.c
@@ -2,7 +2,7 @@
 // GB_mex_clear: clear a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -51,14 +51,11 @@ void mexFunction
     }
     mxClassID aclass = GB_mx_Type_to_classID (A->type) ;
 
-    GxB_print (A,3) ;
-
     // output matrix has same type as input matrix
     GrB_Type ctype = A->type ;
 
     // copy A into C
     GrB_Matrix_dup (&C, A) ;
-    GxB_print (C,3) ;
 
     // clear C
     METHOD (GrB_Matrix_clear (C)) ;
diff --git a/Test/GB_mex_complex.c b/Test/GB_mex_complex.c
index 432f22357d..4613108ee5 100644
--- a/Test/GB_mex_complex.c
+++ b/Test/GB_mex_complex.c
@@ -2,7 +2,7 @@
 // GB_mex_complex: convert a real matrix into a complex one
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_cumsum.c b/Test/GB_mex_cumsum.c
new file mode 100644
index 0000000000..ad9ab16b9d
--- /dev/null
+++ b/Test/GB_mex_cumsum.c
@@ -0,0 +1,74 @@
+//------------------------------------------------------------------------------
+// GB_mex_cumsum: cumulative using GB_cumsum
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "[p,k] = GB_mex_cumsum (c,nthreads,nmalloc)"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+    bool malloc_debug = GB_mx_get_global (true) ;
+
+    // check inputs
+    if (nargin < 1 || nargin > 3 || nargout > 2)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    if (!mxIsClass (pargin [0], "int64"))
+    {
+        mexErrMsgTxt ("c must be an int64 array") ;
+    }
+
+    int64_t *c = mxGetData (pargin [0]) ;
+    int64_t n = (uint64_t) mxGetNumberOfElements (pargin [0]) ;
+
+    int GET_SCALAR (1, int, nthreads, 1) ;
+
+    int GET_SCALAR (2, int, nmalloc, 2) ;
+
+    // make a copy of the input array (as a row vector)
+    pargout [0] = mxCreateNumericMatrix (1, n+1, mxINT64_CLASS, mxREAL) ;
+    int64_t *p = mxGetData (pargout [0]) ;
+    memcpy (p, c, n * sizeof (int64_t)) ;
+    p [n] = 0 ;
+
+    // create the 2nd output, kresult, if requested
+    int64_t *kresult = NULL ;
+    if (nargout > 1)
+    {
+        pargout [1] = mxCreateNumericMatrix (1, 1, mxINT64_CLASS, mxREAL) ;
+        kresult = mxGetData (pargout [1]) ;
+    }
+
+    if (!malloc_debug)
+    {
+        // normal usage
+        GB_cumsum (p, n, kresult, nthreads) ;
+    }
+    else
+    {
+        // test with malloc failures
+        printf ("test cumsum with nmalloc: %d\n", nmalloc) ;
+        GB_Global_malloc_debug_set (true) ;
+        GB_Global_malloc_debug_count_set (nmalloc) ;
+        GB_cumsum (p, n, kresult, nthreads) ;
+        GB_Global_malloc_debug_set (false) ;
+    }
+
+    // log the test coverage
+    GB_mx_put_global (true, 0) ;
+}
+
diff --git a/Test/GB_mex_debug.c b/Test/GB_mex_debug.c
index 6524b56911..ca213b0d02 100644
--- a/Test/GB_mex_debug.c
+++ b/Test/GB_mex_debug.c
@@ -2,7 +2,7 @@
 // GB_mex_debug: determine GB_DEBUG status
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_diag.c b/Test/GB_mex_diag.c
index 9e8aaaa32c..4f2a8bd38a 100644
--- a/Test/GB_mex_diag.c
+++ b/Test/GB_mex_diag.c
@@ -2,7 +2,7 @@
 // GB_mex_diag: compute C=diag(A,1)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_dpagerank.c b/Test/GB_mex_dpagerank.c
index fab71e3e11..9296004347 100644
--- a/Test/GB_mex_dpagerank.c
+++ b/Test/GB_mex_dpagerank.c
@@ -2,7 +2,7 @@
 // GB_mex_dpagerank: compute pagerank with a real semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_dump.c b/Test/GB_mex_dump.c
index 6672630ccc..9ea84779b2 100644
--- a/Test/GB_mex_dump.c
+++ b/Test/GB_mex_dump.c
@@ -2,7 +2,7 @@
 // GB_mex_dump: copy and print a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_dup.c b/Test/GB_mex_dup.c
index 3b3e265d1f..e9470f325f 100644
--- a/Test/GB_mex_dup.c
+++ b/Test/GB_mex_dup.c
@@ -2,7 +2,7 @@
 // GB_mex_dup: copy a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseAdd_Matrix.c b/Test/GB_mex_eWiseAdd_Matrix.c
index 20dbf52019..afa1825b05 100644
--- a/Test/GB_mex_eWiseAdd_Matrix.c
+++ b/Test/GB_mex_eWiseAdd_Matrix.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseAdd_Matrix: C<M> = accum(C,A+B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseAdd_Vector.c b/Test/GB_mex_eWiseAdd_Vector.c
index d4672ec7da..19f4b7b6e7 100644
--- a/Test/GB_mex_eWiseAdd_Vector.c
+++ b/Test/GB_mex_eWiseAdd_Vector.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseAdd_Vector: w<mask> = accum(w,u+v)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseMult_Matrix.c b/Test/GB_mex_eWiseMult_Matrix.c
index 05889edeac..42fcb61ef5 100644
--- a/Test/GB_mex_eWiseMult_Matrix.c
+++ b/Test/GB_mex_eWiseMult_Matrix.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseMult_Matrix: C<Mask> = accum(C,A.*B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseMult_Vector.c b/Test/GB_mex_eWiseMult_Vector.c
index 6f0072f396..a817f8802b 100644
--- a/Test/GB_mex_eWiseMult_Vector.c
+++ b/Test/GB_mex_eWiseMult_Vector.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseMult_Vector: w<mask> = accum(w,u.*v)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseMult_first.c b/Test/GB_mex_eWiseMult_first.c
index 115ea553a4..c1c87fae24 100644
--- a/Test/GB_mex_eWiseMult_first.c
+++ b/Test/GB_mex_eWiseMult_first.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseMult_first: C<Mask> = accum(C,first(A,B))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_eWiseMult_second.c b/Test/GB_mex_eWiseMult_second.c
index 1545f7150b..7d5e7971a2 100644
--- a/Test/GB_mex_eWiseMult_second.c
+++ b/Test/GB_mex_eWiseMult_second.c
@@ -2,7 +2,7 @@
 // GB_mex_eWiseMult_second: C<Mask> = accum(C,second(A,B))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_errors.c b/Test/GB_mex_errors.c
index 0ce1d623b9..f72cc07f50 100644
--- a/Test/GB_mex_errors.c
+++ b/Test/GB_mex_errors.c
@@ -2,7 +2,7 @@
 // GB_mex_errors: test error handling
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -223,23 +223,6 @@ void mexFunction
     ERR (GxB_init (42, mxMalloc, mxCalloc, NULL     , mxFree, false)) ;
     ERR (GxB_init (42, mxMalloc, mxCalloc, mxRealloc, NULL  , false)) ;
 
-    //--------------------------------------------------------------------------
-    // Sauna
-    //--------------------------------------------------------------------------
-
-    printf ("Sauna --------------------------------------------------\n") ;
-    GB_Sauna Sauna = NULL ;
-    CHECK (GB_mx_Sauna_nmalloc ( ) == 0) ;
-    CHECK (GB_Sauna_alloc (0, 8, 8) == GrB_SUCCESS) ;
-    CHECK (GB_mx_Sauna_nmalloc ( ) == 3) ;
-    Sauna = GB_Global_Saunas_get (0) ;
-    GB_Sauna_reset (Sauna, INT64_MAX/2, 0) ;
-    GB_Sauna_reset (Sauna, INT64_MAX/2, 0) ;
-    GB_Sauna_reset (Sauna, INT64_MAX/2, 0) ;
-    GB_Sauna_free (0) ;
-    CHECK (GB_Sauna_alloc (0, INT64_MAX, 8) == GrB_OUT_OF_MEMORY);
-    CHECK (GB_mx_Sauna_nmalloc ( ) == 0) ;
-
     //--------------------------------------------------------------------------
     // Type
     //--------------------------------------------------------------------------
@@ -294,16 +277,9 @@ void mexFunction
     #undef FREE_DEEP_COPY
     #undef GET_DEEP_COPY
 
-    #ifdef MY_COMPLEX
-    printf ("My_complex pre-defined\n") ;
-    #endif
     for (GB_Type_code tcode = 0 ; tcode <= GB_UDT_code ; tcode++)
     {
-        #ifdef MY_COMPLEX
-        GrB_Type utype = My_Complex ;
-        #else
         GrB_Type utype = Complex ;
-        #endif
         GrB_Type ttype = GB_code_type (tcode, utype) ;
         printf ("\n----------------------------------tcode: %d\n", tcode) ;
         OK (GB_Type_check (ttype, "GB_code_type:", GB3, NULL, Context)) ;
@@ -1976,7 +1952,7 @@ void mexFunction
     //--------------------------------------------------------------------------
 
     GrB_Index huge = GB_INDEX_MAX ;
-    GrB_Matrix HugeRow, HugeMatrix ;
+    GrB_Matrix HugeRow, HugeMatrix = NULL ;
     OK (GrB_Matrix_new (&HugeRow, GrB_FP64, 1, huge)) ;
     GB_Matrix_check (HugeRow, "huge row", GB3, NULL, Context) ;
     GxB_fprint (HugeRow, GB3, ff) ;
@@ -1984,7 +1960,7 @@ void mexFunction
     bool mask_applied = false ;
     GrB_Matrix Aslice [1] ;
     Aslice [0] = HugeRow ;
-    OK (GB_AxB_dot2 (&HugeMatrix, NULL, Aslice, HugeRow,
+    OK (GB_AxB_dot2 (&HugeMatrix, NULL, false, Aslice, HugeRow,
         GxB_PLUS_TIMES_FP64, false, &mask_applied, 1, 1, 1, Context)) ;
 
     GxB_fprint (HugeMatrix, GB3, ff) ;
@@ -3644,7 +3620,7 @@ void mexFunction
 
     op1b->opcode = 1024 ;
     ERR (GB_UnaryOp_check (op1b, "op1b invalid opcode", GB1, ff, Context)) ;
-    op1b->opcode = GB_USER_R_opcode ;
+    op1b->opcode = GB_USER_opcode ;
 
     op1b->ztype = NULL ;
     ERR (GB_UnaryOp_check (op1b, "op1b invalid ztype", GB1, ff, Context)) ;
@@ -3690,7 +3666,7 @@ void mexFunction
 
     op2b->opcode = 1024 ;
     ERR (GB_BinaryOp_check (op2b, "op2b invalid opcode", GB1, ff, Context)) ;
-    op2b->opcode = GB_USER_R_opcode ;
+    op2b->opcode = GB_USER_opcode ;
 
     op2b->ztype = NULL ;
     ERR (GB_BinaryOp_check (op2b, "op2b invalid ztype", GB1, ff, Context)) ;
@@ -3743,7 +3719,7 @@ void mexFunction
     selectop->opcode = 9999 ;
     ERR (GB_SelectOp_check (selectop, "selectop invalid opcode", GB1, ff,
         Context)) ;
-    selectop->opcode = GB_USER_SELECT_R_opcode ;
+    selectop->opcode = GB_USER_SELECT_opcode ;
 
     selectop->xtype = Tgunk ;
     ERR (GB_SelectOp_check (selectop, "selectop invalid xtype", GB1, ff,
@@ -4509,7 +4485,8 @@ void mexFunction
     dgunk->out = 999 ;
     x_bool = false ;
     Context->where = "GB_Descriptor_get" ;
-    ERR (GB_Descriptor_get (dgunk, &x_bool, NULL, NULL, NULL, NULL, Context)) ;
+    ERR (GB_Descriptor_get (dgunk, &x_bool, NULL, NULL, NULL, NULL, NULL,
+        Context)) ;
     CHECK (x_bool == false) ;
     dgunk->out = GxB_DEFAULT ;
 
@@ -4660,12 +4637,12 @@ void mexFunction
 
     E = A ;
     GrB_Matrix_dup (&A, A) ;
-    CHECK (GB_mx_isequal (A,E)) ;
+    CHECK (GB_mx_isequal (A, E, 0)) ;
     GrB_free (&E) ;
 
     z = u ;
     GrB_Vector_dup (&u, u) ;
-    CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) z)) ;
+    CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) z, 0)) ;
     GrB_free (&z) ;
 
     for (int what = 0 ; what <= 2 ; what++)
@@ -4699,20 +4676,20 @@ void mexFunction
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_mxm (B, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ;
         OK (GrB_mxm (A, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_vxm (v, umask, NULL, GxB_PLUS_TIMES_FP64, u, A, NULL)) ;
 
         OK (GrB_vxm (u, umask, NULL, GxB_PLUS_TIMES_FP64, u, A, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_mxv (v, umask, NULL, GxB_PLUS_TIMES_FP64, A, u, NULL)) ;
         OK (GrB_mxv (u, umask, NULL, GxB_PLUS_TIMES_FP64, A, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         //----------------------------------------------------------------------
@@ -4722,37 +4699,37 @@ void mexFunction
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseMult (v, umask, NULL, GxB_PLUS_TIMES_FP64,  u, u, NULL)) ;
         OK (GrB_eWiseMult (u, umask, NULL, GxB_PLUS_TIMES_FP64,  u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseMult (v, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ;
         OK (GrB_eWiseMult (u, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseMult (v, umask, NULL, GrB_PLUS_FP64,        u, u, NULL)) ;
         OK (GrB_eWiseMult (u, umask, NULL, GrB_PLUS_FP64,        u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseMult (B, Amask, NULL, GxB_PLUS_TIMES_FP64,  A, A, NULL)) ;
         OK (GrB_eWiseMult (A, Amask, NULL, GxB_PLUS_TIMES_FP64,  A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseMult (B, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ;
         OK (GrB_eWiseMult (A, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseMult (B, Amask, NULL, GrB_PLUS_FP64,        A, A, NULL)) ;
         OK (GrB_eWiseMult (A, Amask, NULL, GrB_PLUS_FP64,        A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         //----------------------------------------------------------------------
@@ -4762,37 +4739,37 @@ void mexFunction
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseAdd  (v, umask, NULL, GxB_PLUS_TIMES_FP64,  u, u, NULL)) ;
         OK (GrB_eWiseAdd  (u, umask, NULL, GxB_PLUS_TIMES_FP64,  u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseAdd  (v, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ;
         OK (GrB_eWiseAdd  (u, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_eWiseAdd  (v, umask, NULL, GrB_PLUS_FP64,        u, u, NULL)) ;
         OK (GrB_eWiseAdd  (u, umask, NULL, GrB_PLUS_FP64,        u, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseAdd  (B, Amask, NULL, GxB_PLUS_TIMES_FP64,  A, A, NULL)) ;
         OK (GrB_eWiseAdd  (A, Amask, NULL, GxB_PLUS_TIMES_FP64,  A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseAdd  (B, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ;
         OK (GrB_eWiseAdd  (A, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_eWiseAdd  (B, Amask, NULL, GrB_PLUS_FP64,        A, A, NULL)) ;
         OK (GrB_eWiseAdd  (A, Amask, NULL, GrB_PLUS_FP64,        A, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         //----------------------------------------------------------------------
@@ -4804,24 +4781,24 @@ void mexFunction
         OK (GrB_Vector_dup (&v, u)) ;
         GB_Vector_check (u, "start u ", GB3, NULL, Context) ;
         GB_Vector_check (v, "start v ", GB3, NULL, Context) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
 
         OK (GrB_extract   (u, umask, NULL, u, GrB_ALL, n, NULL)) ;
         GB_Vector_check (u, "u to check", GB3, NULL, Context) ;
         GB_Vector_check (v, "v to check", GB3, NULL, Context) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_extract   (B, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ;
         OK (GrB_extract   (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_extract   (v, umask, NULL, A, GrB_ALL, n, 0, NULL)) ;
         OK (GrB_extract   (u, umask, NULL, A, GrB_ALL, n, 0, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         //----------------------------------------------------------------------
@@ -4833,21 +4810,21 @@ void mexFunction
         OK (GxB_subassign (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ;
 
         GB_wait (B, Context) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GxB_subassign (B, Amask, NULL, A, ilist, n, jlist, n, NULL)) ;
         OK (GxB_subassign (A, Amask, NULL, A, ilist, n, jlist, n, NULL)) ;
         GB_wait (B, Context) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GxB_subassign (v, umask, NULL, u, GrB_ALL, n, NULL)) ;
         OK (GxB_subassign (u, umask, NULL, u, GrB_ALL, n, NULL)) ;
         GB_wait ((GrB_Matrix) v, Context) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
@@ -4855,7 +4832,7 @@ void mexFunction
         OK (GxB_subassign (u, umask, NULL, u, ilist, n, NULL)) ;
         GB_wait ((GrB_Matrix) v, Context) ;
         GB_wait ((GrB_Matrix) u, Context) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         //----------------------------------------------------------------------
@@ -4865,7 +4842,7 @@ void mexFunction
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_assign (B, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ;
         OK (GrB_assign (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         OK (GrB_Matrix_dup (&B, A)) ;
@@ -4873,13 +4850,13 @@ void mexFunction
         OK (GrB_assign (A, Amask, NULL, A, ilist, n, jlist, n, NULL)) ;
         GB_wait (B, Context) ;
         GB_wait (A, Context) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_assign (v, umask, NULL, u, GrB_ALL, n, NULL)) ;
         OK (GrB_assign (u, umask, NULL, u, GrB_ALL, n, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
@@ -4887,7 +4864,7 @@ void mexFunction
         OK (GrB_assign (u, umask, NULL, u, ilist, n, NULL)) ;
         GB_wait ((GrB_Matrix) v, Context) ;
         GB_wait ((GrB_Matrix) u, Context) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
         //----------------------------------------------------------------------
@@ -4897,13 +4874,13 @@ void mexFunction
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GrB_apply (B, Amask, NULL, GrB_AINV_FP64, A, NULL)) ;
         OK (GrB_apply (A, Amask, NULL, GrB_AINV_FP64, A, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 1e-14)) ;
         GrB_free (&B) ;
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GrB_apply (v, umask, NULL, GrB_AINV_FP64, u, NULL)) ;
         OK (GrB_apply (u, umask, NULL, GrB_AINV_FP64, u, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 1e-14)) ;
         GrB_free (&v) ;
 
         //----------------------------------------------------------------------
@@ -4913,14 +4890,14 @@ void mexFunction
         OK (GrB_Matrix_dup (&B, A)) ;
         OK (GxB_select (B, Amask, NULL, GxB_NONZERO, A, NULL, NULL)) ;
         OK (GxB_select (A, Amask, NULL, GxB_NONZERO, A, NULL, NULL)) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
 
         OK (GrB_Vector_dup (&v, u)) ;
         OK (GxB_select (v, umask, NULL, GxB_NONZERO, u, NULL, NULL)) ;
         OK (GxB_select (u, umask, NULL, GxB_NONZERO, u, NULL, NULL)) ;
-        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ;
+        CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v, 0)) ;
         GrB_free (&v) ;
 
 
@@ -4935,9 +4912,7 @@ void mexFunction
         GrB_Index ignore ;
         OK (GrB_Matrix_nvals (&ignore, A)) ;
         OK (GrB_Matrix_nvals (&ignore, B)) ;
-        // GxB_print (A, GB3) ;
-        // GxB_print (B, GB3) ;
-        CHECK (GB_mx_isequal (A,B)) ;
+        CHECK (GB_mx_isequal (A, B, 0)) ;
         GrB_free (&B) ;
 
         if (what == 2)
diff --git a/Test/GB_mex_errors.h b/Test/GB_mex_errors.h
index 8c9be4a9e4..c6d8632c11 100644
--- a/Test/GB_mex_errors.h
+++ b/Test/GB_mex_errors.h
@@ -2,7 +2,7 @@
 // GB_mex_errors.h: error handling macros
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_ewise_alias1.c b/Test/GB_mex_ewise_alias1.c
new file mode 100644
index 0000000000..3808994eee
--- /dev/null
+++ b/Test/GB_mex_ewise_alias1.c
@@ -0,0 +1,86 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias1: C = A+C
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias1 (C, accum, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&A) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 3 || nargin > 4)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get accum; default: NOP, default class is class(C)
+    GrB_BinaryOp accum ;
+    if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("accum failed") ;
+    }
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [2], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (3), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C = A+C
+    METHOD (GrB_eWiseAdd (C, NULL, NULL, accum, A, C, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_ewise_alias2.c b/Test/GB_mex_ewise_alias2.c
new file mode 100644
index 0000000000..8c1ea2ed12
--- /dev/null
+++ b/Test/GB_mex_ewise_alias2.c
@@ -0,0 +1,86 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias2: C += A+A
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias2 (C, accum, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&A) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 3 || nargin > 4)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get accum; default: NOP, default class is class(C)
+    GrB_BinaryOp accum ;
+    if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("accum failed") ;
+    }
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [2], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (3), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C += A+A
+    METHOD (GrB_eWiseAdd (C, NULL, accum, accum, A, A, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_ewise_alias3.c b/Test/GB_mex_ewise_alias3.c
new file mode 100644
index 0000000000..0049140900
--- /dev/null
+++ b/Test/GB_mex_ewise_alias3.c
@@ -0,0 +1,86 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias3: C = C+A
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias3 (C, accum, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&A) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 3 || nargin > 4)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get accum; default: NOP, default class is class(C)
+    GrB_BinaryOp accum ;
+    if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("accum failed") ;
+    }
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [2], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (3), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C = C+A
+    METHOD (GrB_eWiseAdd (C, NULL, NULL, accum, C, A, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_ewise_alias4.c b/Test/GB_mex_ewise_alias4.c
new file mode 100644
index 0000000000..a4d389e517
--- /dev/null
+++ b/Test/GB_mex_ewise_alias4.c
@@ -0,0 +1,86 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias4: C<M> = M+M
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias4 (C, M, op, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&M) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, M = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 3 || nargin > 4)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get M (shallow copy)
+    M = GB_mx_mxArray_to_Matrix (pargin [1], "M input", false, true) ;
+    if (M == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("M failed") ;
+    }
+
+    // get op; default: NOP, default class is class(C)
+    GrB_BinaryOp op ;
+    if (!GB_mx_mxArray_to_BinaryOp (&op, pargin [2], "op",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("op failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (3), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C<M> = M+M
+    METHOD (GrB_eWiseAdd (C, M, NULL, op, M, M, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_ewise_alias5.c b/Test/GB_mex_ewise_alias5.c
new file mode 100644
index 0000000000..d2c461a403
--- /dev/null
+++ b/Test/GB_mex_ewise_alias5.c
@@ -0,0 +1,95 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias5: C<M> = A+M
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias5 (C, M, op, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&A) ;                   \
+    GB_MATRIX_FREE (&M) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, M = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 4 || nargin > 5)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get M (shallow copy)
+    M = GB_mx_mxArray_to_Matrix (pargin [1], "M input", false, true) ;
+    if (M == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("M failed") ;
+    }
+
+    // get op; default: NOP, default class is class(C)
+    GrB_BinaryOp op ;
+    if (!GB_mx_mxArray_to_BinaryOp (&op, pargin [2], "op",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("op failed") ;
+    }
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [3], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (4), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C<M> = A+M
+    METHOD (GrB_eWiseAdd (C, M, NULL, op, A, M, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_ewise_alias6.c b/Test/GB_mex_ewise_alias6.c
new file mode 100644
index 0000000000..c19c33918c
--- /dev/null
+++ b/Test/GB_mex_ewise_alias6.c
@@ -0,0 +1,95 @@
+//------------------------------------------------------------------------------
+// GB_mex_ewise_alias5: C<M> = M+A
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "C = GB_mex_ewise_alias5 (C, M, op, A, desc)"
+
+#define FREE_ALL                            \
+{                                           \
+    GB_MATRIX_FREE (&A) ;                   \
+    GB_MATRIX_FREE (&M) ;                   \
+    GB_MATRIX_FREE (&C) ;                   \
+    GrB_free (&desc) ;                      \
+    GB_mx_put_global (true, 0) ;            \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, M = NULL, A = NULL ;
+    GrB_Descriptor desc = NULL ;
+
+    // check inputs
+    GB_WHERE (USAGE) ;
+    if (nargout > 1 || nargin < 4 || nargin > 5)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get C (make a deep copy)
+    #define GET_DEEP_COPY \
+    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;
+    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    GET_DEEP_COPY ;
+    if (C == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("C failed") ;
+    }
+    mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+
+    // get M (shallow copy)
+    M = GB_mx_mxArray_to_Matrix (pargin [1], "M input", false, true) ;
+    if (M == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("M failed") ;
+    }
+
+    // get op; default: NOP, default class is class(C)
+    GrB_BinaryOp op ;
+    if (!GB_mx_mxArray_to_BinaryOp (&op, pargin [2], "op",
+        GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("op failed") ;
+    }
+
+    // get A (shallow copy)
+    A = GB_mx_mxArray_to_Matrix (pargin [3], "A input", false, true) ;
+    if (A == NULL)
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("A failed") ;
+    }
+
+    // get desc
+    if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (4), "desc"))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("desc failed") ;
+    }
+
+    // C<M> = M+A
+    METHOD (GrB_eWiseAdd (C, M, NULL, op, M, A, desc)) ;
+
+    // return C to MATLAB as a struct and free the GraphBLAS C
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_expand.c b/Test/GB_mex_expand.c
new file mode 100644
index 0000000000..a1412c019d
--- /dev/null
+++ b/Test/GB_mex_expand.c
@@ -0,0 +1,107 @@
+//------------------------------------------------------------------------------
+// GB_mex_expand: C<M,struct> = scalar
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+#include "matrix.h"
+
+#define USAGE "C = GB_mex_expand (M, scalar)"
+
+#define FREE_ALL                        \
+{                                       \
+    GB_MATRIX_FREE (&M) ;               \
+    GB_MATRIX_FREE (&C) ;               \
+    GB_mx_put_global (true, 0) ;        \
+}
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+
+    bool malloc_debug = GB_mx_get_global (true) ;
+    GrB_Matrix C = NULL, M = NULL ;
+
+    GB_WHERE (USAGE) ;
+    if (nargin != 2 || nargout > 1)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+
+    // get M (shallow copy)
+    M = GB_mx_mxArray_to_Matrix (pargin [0], "M", false, false) ;
+    if (M == NULL && !mxIsEmpty (pargin [0]))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("M failed") ;
+    }
+
+    GrB_Index nrows, ncols ;
+    GrB_Matrix_nrows (&nrows, M) ;
+    GrB_Matrix_ncols (&ncols, M) ;
+
+    // get scalar
+    if (!mxIsScalar (pargin [1]))
+    {
+        FREE_ALL ;
+        mexErrMsgTxt ("scalar failed") ;
+    }
+
+    // C<M,struct> = scalar
+    if (mxIsSparse (pargin [1]))
+    {
+        mexErrMsgTxt ("scalar must not be sparse") ;
+    }
+    else if (mxIsComplex (pargin [1]))
+    {
+        // complex case
+        double xcomplex [2] ;
+        GB_mx_complex_merge (1, xcomplex, pargin [1]) ;
+        GrB_Matrix_new (&C, Complex, nrows, ncols) ;
+        GxB_Matrix_subassign_UDT (C, M, NULL, (void *) xcomplex,
+            GrB_ALL, nrows, GrB_ALL, ncols, GrB_DESC_RS) ;
+    }
+    else
+    {
+        // built-in GraphBLAS types
+
+        #define CREATE(grb_type,c_type)                             \
+        {                                                           \
+            GrB_Matrix_new (&C, grb_type, nrows, ncols) ;           \
+            c_type *scalar = (c_type *) mxGetData (pargin [1]) ;    \
+            GxB_subassign (C, M, NULL, *scalar,                     \
+                GrB_ALL, nrows, GrB_ALL, ncols, GrB_DESC_RS) ;      \
+        }                                                           \
+        break ;
+
+        switch (mxGetClassID (pargin [1]))
+        {
+            case mxLOGICAL_CLASS : CREATE (GrB_BOOL   , bool     ) ;
+            case mxDOUBLE_CLASS  : CREATE (GrB_FP64   , double   ) ;
+            case mxSINGLE_CLASS  : CREATE (GrB_FP32   , float    ) ;
+            case mxINT8_CLASS    : CREATE (GrB_INT8   , int8_t   ) ;
+            case mxUINT8_CLASS   : CREATE (GrB_UINT8  , uint8_t  ) ;
+            case mxINT16_CLASS   : CREATE (GrB_INT16  , int16_t  ) ;
+            case mxUINT16_CLASS  : CREATE (GrB_UINT16 , uint16_t ) ;
+            case mxINT32_CLASS   : CREATE (GrB_INT32  , int32_t  ) ;
+            case mxUINT32_CLASS  : CREATE (GrB_UINT32 , uint32_t ) ;
+            case mxINT64_CLASS   : CREATE (GrB_INT64  , int64_t  ) ;
+            case mxUINT64_CLASS  : CREATE (GrB_UINT64 , uint64_t ) ;
+            default: mexErrMsgTxt ("scalar type not supported") ;
+        }
+    }
+
+    // return result to MATLAB
+    pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C result", true) ;
+    FREE_ALL ;
+}
+
diff --git a/Test/GB_mex_export.c b/Test/GB_mex_export.c
index cbc0f3395d..15159bc1a0 100644
--- a/Test/GB_mex_export.c
+++ b/Test/GB_mex_export.c
@@ -2,7 +2,7 @@
 // GB_mex_export: test import/export
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_export_import.c b/Test/GB_mex_export_import.c
index 832ad3486d..ec89f78907 100644
--- a/Test/GB_mex_export_import.c
+++ b/Test/GB_mex_export_import.c
@@ -2,7 +2,7 @@
 // GB_mex_export_import: export and then reimport a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_extractTuples.c b/Test/GB_mex_extractTuples.c
index ce274d550d..a3dc2fe81e 100644
--- a/Test/GB_mex_extractTuples.c
+++ b/Test/GB_mex_extractTuples.c
@@ -2,7 +2,7 @@
 // GB_mex_extractTuples: extract all tuples from a matrix or vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -118,7 +118,6 @@ void mexFunction
             case GB_UINT64_code : METHOD (GrB_Vector_extractTuples (I, (uint64_t *) X, &nvals, v)) ; break ;
             case GB_FP32_code   : METHOD (GrB_Vector_extractTuples (I, (float    *) X, &nvals, v)) ; break ;
             case GB_FP64_code   : METHOD (GrB_Vector_extractTuples (I, (double   *) X, &nvals, v)) ; break ;
-            case GB_UCT_code    : 
             case GB_UDT_code    : 
               METHOD (GrB_Vector_extractTuples (I, Xtemp, &nvals, v)) ; break ;
             default             : FREE_ALL ; mexErrMsgTxt ("unsupported class") ;
@@ -143,7 +142,6 @@ void mexFunction
             case GB_UINT64_code : METHOD (GrB_Matrix_extractTuples (I, J, (uint64_t *) X, &nvals, A)) ; break ;
             case GB_FP32_code   : METHOD (GrB_Matrix_extractTuples (I, J, (float    *) X, &nvals, A)) ; break ;
             case GB_FP64_code   : METHOD (GrB_Matrix_extractTuples (I, J, (double   *) X, &nvals, A)) ; break;
-            case GB_UCT_code    :
             case GB_UDT_code    :
                 METHOD (GrB_Matrix_extractTuples (I, J, Xtemp, &nvals, A)) ; break;
             default             : FREE_ALL ; mexErrMsgTxt ("unsupported class") ;
diff --git a/Test/GB_mex_hack.c b/Test/GB_mex_hack.c
index f7cb2a6db9..287bc56b32 100644
--- a/Test/GB_mex_hack.c
+++ b/Test/GB_mex_hack.c
@@ -2,7 +2,7 @@
 // GB_mex_dump: copy and print a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_init.c b/Test/GB_mex_init.c
index df2b763136..661edd7401 100644
--- a/Test/GB_mex_init.c
+++ b/Test/GB_mex_init.c
@@ -2,7 +2,7 @@
 // GB_mex_init: initialize GraphBLAS
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_ipagerank.c b/Test/GB_mex_ipagerank.c
index 97dd14aa83..ccb32b1fa7 100644
--- a/Test/GB_mex_ipagerank.c
+++ b/Test/GB_mex_ipagerank.c
@@ -2,7 +2,7 @@
 // GB_mex_ipagerank: compute pagerank with an integer semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_isequal.c b/Test/GB_mex_isequal.c
index 27506ddfef..fa458bfe81 100644
--- a/Test/GB_mex_isequal.c
+++ b/Test/GB_mex_isequal.c
@@ -2,7 +2,7 @@
 // GB_mex_isequal: returns true if A and B are equal
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_kron.c b/Test/GB_mex_kron.c
index 0792bf4981..ab0ca2f7d7 100644
--- a/Test/GB_mex_kron.c
+++ b/Test/GB_mex_kron.c
@@ -2,7 +2,7 @@
 // GB_mex_kron: C<Mask> = accum(C,kron(A,B))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_mis.c b/Test/GB_mex_mis.c
index 1671687ca4..fd30325edc 100644
--- a/Test/GB_mex_mis.c
+++ b/Test/GB_mex_mis.c
@@ -2,7 +2,7 @@
 // GB_mex_mis: s=mis(A), find a maximal independent set
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_msort_1.c b/Test/GB_mex_msort_1.c
new file mode 100644
index 0000000000..2016fd453a
--- /dev/null
+++ b/Test/GB_mex_msort_1.c
@@ -0,0 +1,58 @@
+//------------------------------------------------------------------------------
+// GB_mex_msort_1: sort using GB_msort_1
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+#define USAGE "I = GB_mex_msort_1 (I,nthreads)"
+
+void mexFunction
+(
+    int nargout,
+    mxArray *pargout [ ],
+    int nargin,
+    const mxArray *pargin [ ]
+)
+{
+    bool malloc_debug = GB_mx_get_global (true) ;
+
+    // check inputs
+    if (nargin != 2 || nargout != 1)
+    {
+        mexErrMsgTxt ("Usage: " USAGE) ;
+    }
+    if (!mxIsClass (pargin [0], "int64"))
+    {
+        mexErrMsgTxt ("I must be a int64 array") ;
+    }
+
+    int64_t *I = mxGetData (pargin [0]) ;
+    int64_t n = (uint64_t) mxGetNumberOfElements (pargin [0]) ;
+
+    int GET_SCALAR (1, int, nthreads, 1) ;
+
+    // make a copy of the input array
+    pargout [0] = mxCreateNumericMatrix (n, 1, mxINT64_CLASS, mxREAL) ;
+    int64_t *Iout = mxGetData (pargout [0]) ;
+    memcpy (Iout, I, n * sizeof (int64_t)) ;
+
+    // get workspace
+    int64_t *Work_0 = mxMalloc ((n+1) * sizeof (int64_t)) ;
+
+    GB_MEX_TIC ;
+
+    GB_msort_1 (Iout, Work_0, n, nthreads) ;
+
+    GB_MEX_TOC ;
+
+    // free workspace
+    mxFree (Work_0) ;
+
+    GB_mx_put_global (true, 0) ;
+}
+
diff --git a/Test/GB_mex_msort_2.c b/Test/GB_mex_msort_2.c
index 36d0c09390..41741eb1b3 100644
--- a/Test/GB_mex_msort_2.c
+++ b/Test/GB_mex_msort_2.c
@@ -2,7 +2,7 @@
 // GB_mex_msort_2: sort using GB_msort_2
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_msort_3.c b/Test/GB_mex_msort_3.c
index b5452f3a6f..d0adb549fe 100644
--- a/Test/GB_mex_msort_3.c
+++ b/Test/GB_mex_msort_3.c
@@ -2,7 +2,7 @@
 // GB_mex_msort_3: sort using GB_msort_3
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_mxm.c b/Test/GB_mex_mxm.c
index 66bdb7e00b..4e9fb45757 100644
--- a/Test/GB_mex_mxm.c
+++ b/Test/GB_mex_mxm.c
@@ -2,7 +2,7 @@
 // GB_mex_mxm: C<Mask> = accum(C,A*B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_mxm_alias.c b/Test/GB_mex_mxm_alias.c
index 11795e3609..1ef58d1a74 100644
--- a/Test/GB_mex_mxm_alias.c
+++ b/Test/GB_mex_mxm_alias.c
@@ -2,7 +2,7 @@
 // GB_mex_mxm_alias: C<C> = accum(C,C*C)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_mxm_flops.c b/Test/GB_mex_mxm_flops.c
index 838a6259d9..8473fd8fd0 100644
--- a/Test/GB_mex_mxm_flops.c
+++ b/Test/GB_mex_mxm_flops.c
@@ -1,15 +1,15 @@
 //------------------------------------------------------------------------------
-// GB_mex_mxm_flops: compute flops to do C<M>=A*B or C=A*B
+// GB_mex_mxm_flops: compute flops to do C=A*B, C<M>=A*B or C<!M>=A*B
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 #include "GB_mex.h"
 
-#define USAGE "[result bflops] = GB_mex_mxm (M, A, B, floplimit)"
+#define USAGE "[bflops mwork] = GB_mex_mxm_flops (M, Mask_comp, A, B)"
 
 #define FREE_ALL                            \
 {                                           \
@@ -35,7 +35,7 @@ void mexFunction
 
     // check inputs
     GB_WHERE (USAGE) ;
-    if (nargout > 2 || nargin < 3 || nargin > 4)
+    if (nargout > 2 || nargin != 4)
     {
         mexErrMsgTxt ("Usage: " USAGE) ;
     }
@@ -48,8 +48,12 @@ void mexFunction
         mexErrMsgTxt ("M failed") ;
     }
 
+    // get Mask_comp
+    bool GET_SCALAR (1, bool, Mask_comp, 0) ;
+    // printf ("complement: %d\n", Mask_comp) ;
+
     // get A (shallow copy)
-    A = GB_mx_mxArray_to_Matrix (pargin [1], "A", false, true) ;
+    A = GB_mx_mxArray_to_Matrix (pargin [2], "A", false, true) ;
     if (A == NULL)
     {
         FREE_ALL ;
@@ -57,42 +61,32 @@ void mexFunction
     }
 
     // get B (shallow copy)
-    B = GB_mx_mxArray_to_Matrix (pargin [2], "B", false, true) ;
+    B = GB_mx_mxArray_to_Matrix (pargin [3], "B", false, true) ;
     if (B == NULL)
     {
         FREE_ALL ;
         mexErrMsgTxt ("B failed") ;
     }
 
-    // get floplimit
-    int64_t GET_SCALAR (3, int64_t, floplimit, INT64_MAX) ;
-
-    // allocate Bflops, if it is to be computed
+    // allocate Bflops (note the calloc)
     int64_t bnvec = B->nvec ;
-    int64_t *Bflops = NULL ;
-    if (nargout > 1)
-    {
-        // note the calloc of Bflops
-        Bflops = mxCalloc ((bnvec+1), sizeof (int64_t)) ;
-    }
+    int64_t *Bflops = mxCalloc ((bnvec+1), sizeof (int64_t)) ;
 
     // compute the flop count
-    bool result ;
-    GB_AxB_flopcount (&result, Bflops, NULL, M, A, B, floplimit, Context) ;
+    int64_t Mwork = 0 ;
+
+    GB_AxB_flopcount (&Mwork, Bflops, M, Mask_comp, A, B, Context) ;
 
     // return result to MATLAB
-    pargout [0] = mxCreateDoubleScalar ((double) result) ;
-    if (nargout > 1)
+    pargout [0] = mxCreateDoubleMatrix (1, bnvec+1, mxREAL) ;
+    double *Bflops_matlab = mxGetPr (pargout [0]) ; 
+    for (int64_t kk = 0 ; kk <= bnvec ; kk++)
     {
-        pargout [1] = mxCreateDoubleMatrix (1, bnvec+1, mxREAL) ;
-        double *Bflops_matlab = mxGetPr (pargout [1]) ; 
-        for (int64_t kk = 0 ; kk <= bnvec ; kk++)
-        {
-            Bflops_matlab [kk] = (double) Bflops [kk] ;
-        }
-        mxFree (Bflops) ;
+        Bflops_matlab [kk] = (double) Bflops [kk] ;
     }
 
+    pargout [1] = mxCreateDoubleScalar (Mwork) ;
+    mxFree (Bflops) ;
     FREE_ALL ;
 }
 
diff --git a/Test/GB_mex_mxv.c b/Test/GB_mex_mxv.c
index c8711516f1..77b6c28c3d 100644
--- a/Test/GB_mex_mxv.c
+++ b/Test/GB_mex_mxv.c
@@ -2,7 +2,7 @@
 // GB_mex_mxv: w<mask> = accum(w,A*u)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_nonzero.c b/Test/GB_mex_nonzero.c
index 907a043f97..f987661839 100644
--- a/Test/GB_mex_nonzero.c
+++ b/Test/GB_mex_nonzero.c
@@ -2,7 +2,7 @@
 // GB_mex_nonzero: compute C=nonzero(A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_offdiag.c b/Test/GB_mex_offdiag.c
index d6b0adf2ab..cffc4c1f01 100644
--- a/Test/GB_mex_offdiag.c
+++ b/Test/GB_mex_offdiag.c
@@ -2,7 +2,7 @@
 // GB_mex_offdiag: compute C=offdiag(A,1)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_omp_max_threads.c b/Test/GB_mex_omp_max_threads.c
index 26fcb9d678..ba85ee5cc1 100644
--- a/Test/GB_mex_omp_max_threads.c
+++ b/Test/GB_mex_omp_max_threads.c
@@ -2,7 +2,7 @@
 // GB_mex_omp_max_threads: omp_get_max_threads ( )
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_op.c b/Test/GB_mex_op.c
index 60e2e0e901..0b0768f628 100644
--- a/Test/GB_mex_op.c
+++ b/Test/GB_mex_op.c
@@ -2,7 +2,7 @@
 // GB_mex_op: apply a built-in GraphBLAS operator to MATLAB arrays
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -106,7 +106,7 @@ void mexFunction
         op_ztype = op1->ztype ; op_zsize = op_ztype->size ;
         op_xtype = op1->xtype ; op_xsize = op_xtype->size ;
         op_ytype = NULL       ; op_ysize = 1 ;
-        ASSERT_BINARYOP_OK (op1, "unary op", GB0) ;
+        ASSERT_UNARYOP_OK (op1, "unary op", GB0) ;
     }
 
     ASSERT_TYPE_OK (op_ztype, "Z type", GB0) ;
diff --git a/Test/GB_mex_qsort.c b/Test/GB_mex_qsort.c
index 8b1c48e282..f3be31a282 100644
--- a/Test/GB_mex_qsort.c
+++ b/Test/GB_mex_qsort.c
@@ -2,7 +2,7 @@
 // GB_mex_qsort: sort int64's using GB_qsort_1a
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_qsort_1a.c b/Test/GB_mex_qsort_1a.c
index 21e235caaa..6db03d1db3 100644
--- a/Test/GB_mex_qsort_1a.c
+++ b/Test/GB_mex_qsort_1a.c
@@ -2,7 +2,7 @@
 // GB_mex_qsort_1a: sort using GB_qsort_1a
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_qsort_1b.c b/Test/GB_mex_qsort_1b.c
index 2942ffb3bb..127cd2c3fe 100644
--- a/Test/GB_mex_qsort_1b.c
+++ b/Test/GB_mex_qsort_1b.c
@@ -2,7 +2,7 @@
 // GB_mex_qsort_1b: sort using GB_qsort_1b
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_qsort_2.c b/Test/GB_mex_qsort_2.c
index cc61989cb1..33495392bd 100644
--- a/Test/GB_mex_qsort_2.c
+++ b/Test/GB_mex_qsort_2.c
@@ -2,7 +2,7 @@
 // GB_mex_qsort_2: sort using GB_qsort_2
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_qsort_3.c b/Test/GB_mex_qsort_3.c
index 930a7e8f20..cfe432edfe 100644
--- a/Test/GB_mex_qsort_3.c
+++ b/Test/GB_mex_qsort_3.c
@@ -2,7 +2,7 @@
 // GB_mex_qsort_3: sort using GB_qsort_3
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_random.c b/Test/GB_mex_random.c
index b3e402c7f2..74a5c8e9b6 100644
--- a/Test/GB_mex_random.c
+++ b/Test/GB_mex_random.c
@@ -2,7 +2,7 @@
 // GB_mex_random: construct a random matrix, double or Complex
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_rdiv.c b/Test/GB_mex_rdiv.c
index 1cb7354281..d8d0d8e0ff 100644
--- a/Test/GB_mex_rdiv.c
+++ b/Test/GB_mex_rdiv.c
@@ -2,7 +2,7 @@
 // GB_mex_rdiv: compute C=A*B with the rdiv operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,7 +29,7 @@
 
 GrB_Info info ;
 bool malloc_debug = false ;
-bool ignore = false ;
+bool ignore = false, ignore2 = false ;
 bool cprint = false ;
 GrB_Matrix A = NULL, B = NULL, C = NULL ;
 int64_t anrows = 0 ;
@@ -40,7 +40,6 @@ GrB_Desc_Value AxB_method = GxB_DEFAULT, AxB_method_used ;
 
 GrB_Info axb (GB_Context Context, bool cprint) ;
 
-#ifndef MY_RDIV
 GrB_Semiring My_plus_rdiv = NULL ;
 GrB_BinaryOp My_rdiv = NULL ;
 
@@ -51,13 +50,10 @@ void my_rdiv (double *z, const double *x, const double *y)
     (*z) = (*y) / (*x) ;
 }
 
-#endif
-
 //------------------------------------------------------------------------------
 
 GrB_Info axb (GB_Context Context, bool cprint)
 {
-    #ifndef MY_RDIV
     // create the rdiv operator
     info = GrB_BinaryOp_new (&My_rdiv, my_rdiv, GrB_FP64, GrB_FP64, GrB_FP64) ;
     if (info != GrB_SUCCESS) return (info) ;
@@ -67,22 +63,24 @@ GrB_Info axb (GB_Context Context, bool cprint)
         GrB_free (&My_rdiv) ;
         return (info) ;
     }
-    #else
-    // printf ("using precompiled semiring %p\n", My_plus_rdiv) ;
-    #endif
 
     // C = A*B
     info = GB_AxB_meta (&C,
+        NULL,       // not in place
+        false,      // C_replace
         true,       // CSC
         NULL,       // no MT returned
         NULL,       // no Mask
         false,      // mask not complemented
+        false,      // mask not structural
+        NULL,       // no accum
         A, B,
         My_plus_rdiv,
         false,      // no A transpose
         false,      // no B transpose
         false,      // no flipxy
         &ignore,    // mask_applied
+        &ignore2,   // done_in_place
         AxB_method, &AxB_method_used, Context) ;
 
     if (C != NULL)
@@ -91,7 +89,6 @@ GrB_Info axb (GB_Context Context, bool cprint)
         if (cprint) GxB_print (C, GxB_COMPLETE) ;
     }
 
-    // does nothing if the objects are pre-compiled
     GrB_free (&My_rdiv) ;
     GrB_free (&My_plus_rdiv) ;
 
@@ -116,10 +113,8 @@ void mexFunction
     B = NULL ;
     C = NULL ;
 
-    #ifndef MY_RDIV
     My_rdiv = NULL ;
     My_plus_rdiv = NULL ;
-    #endif
 
     GB_WHERE (USAGE) ;
 
@@ -151,6 +146,8 @@ void mexFunction
     // 1001: Gustavson
     // 1002: heap
     // 1003: dot
+    // 1004: hash
+    // 1005: saxpy
     GET_SCALAR (2, GrB_Desc_Value, AxB_method, GxB_DEFAULT) ;
 
     // get the cprint flag
@@ -159,6 +156,8 @@ void mexFunction
     if (! ((AxB_method == GxB_DEFAULT) ||
         (AxB_method == GxB_AxB_GUSTAVSON) ||
         (AxB_method == GxB_AxB_HEAP) ||
+        (AxB_method == GxB_AxB_HASH) ||
+        (AxB_method == GxB_AxB_SAXPY) ||
         (AxB_method == GxB_AxB_DOT)))
     {
         mexErrMsgTxt ("unknown method") ;
diff --git a/Test/GB_mex_rdiv2.c b/Test/GB_mex_rdiv2.c
index 17774ce40c..d08b8341c4 100644
--- a/Test/GB_mex_rdiv2.c
+++ b/Test/GB_mex_rdiv2.c
@@ -2,7 +2,7 @@
 // GB_mex_rdiv2: compute C=A*B with the rdiv2 operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -14,7 +14,7 @@
 
 #include "GB_mex.h"
 
-#define USAGE "C = GB_mex_AxB (A, B, atrans, btrans, axb_method, flipxy)"
+#define USAGE "C = GB_mex_rdiv2 (A, B, atrans, btrans, axb_method, flipxy, C_scalar)"
 
 #define FREE_ALL                        \
 {                                       \
@@ -22,6 +22,7 @@
     GB_MATRIX_FREE (&B) ;               \
     GB_MATRIX_FREE (&B64) ;             \
     GB_MATRIX_FREE (&C) ;               \
+    GB_MATRIX_FREE (&T) ;               \
     GrB_free (&My_rdiv2) ;              \
     GrB_free (&My_plus_rdiv2) ;         \
     GB_mx_put_global (true, 0) ;        \
@@ -31,20 +32,21 @@
 
 GrB_Info info ;
 bool malloc_debug = false ;
-bool ignore = false ;
+bool ignore = false, ignore2 = false ;
 bool atranspose = false ;
 bool btranspose = false ;
-GrB_Matrix A = NULL, B = NULL, B64 = NULL, C = NULL ;
+GrB_Matrix A = NULL, B = NULL, B64 = NULL, C = NULL, T = NULL ;
 int64_t anrows = 0 ;
 int64_t ancols = 0 ;
 int64_t bnrows = 0 ;
 int64_t bncols = 0 ;
 GrB_Desc_Value AxB_method = GxB_DEFAULT, AxB_method_used ;
 bool flipxy = false ;
+bool done_in_place = false ;
+double C_scalar = 0 ;
 
 GrB_Info axb (GB_Context Context) ;
 
-#ifndef MY_RDIV
 GrB_Semiring My_plus_rdiv2 = NULL ;
 GrB_BinaryOp My_rdiv2 = NULL ;
 
@@ -54,13 +56,11 @@ void my_rdiv2 (double *z, const double *x, const float *y)
 {
     (*z) = ((double) (*y)) / (*x) ;
 }
-#endif
 
 //------------------------------------------------------------------------------
 
 GrB_Info axb (GB_Context Context)
 {
-    #ifndef MY_RDIV
     // create the rdiv2 operator
     info = GrB_BinaryOp_new (&My_rdiv2, my_rdiv2, GrB_FP64, GrB_FP64, GrB_FP32);
     if (info != GrB_SUCCESS) return (info) ;
@@ -70,25 +70,74 @@ GrB_Info axb (GB_Context Context)
         GrB_free (&My_rdiv2) ;
         return (info) ;
     }
-    #else
-    // printf ("using precompiled semiring %p\n", My_plus_rdiv2) ;
-    #endif
 
-    // C = A*B
-    info = GB_AxB_meta (&C,
+    bool do_in_place = (C_scalar != 0) ;
+    C = NULL ;
+
+    if (do_in_place)
+    {
+        // construct the result matrix and fill it with the scalar
+        GrB_Index cnrows = anrows ;
+        GrB_Index cncols = bncols ;
+        info = GrB_Matrix_new (&C, GrB_FP64, cnrows, cncols) ;
+        if (info != GrB_SUCCESS)
+        {
+            GrB_free (&My_rdiv2) ;
+            GrB_free (&My_plus_rdiv2) ;
+            return (info) ;
+        }
+        info = GrB_assign (C, NULL, NULL, C_scalar,
+            GrB_ALL, cnrows, GrB_ALL, cncols, NULL) ;
+        if (info != GrB_SUCCESS) 
+        {
+            GrB_free (&My_rdiv2) ;
+            GrB_free (&My_plus_rdiv2) ;
+            GrB_free (&C) ;
+            return (info) ;
+        }
+        // GxB_print (C, 3) ;
+    }
+
+    // C = A*B or C += A*B
+    info = GB_AxB_meta (
+        &T,
+        C,
+        false,      // C_replace
         true,       // CSC
         NULL,       // no MT returned
         NULL,       // no Mask
         false,      // mask not complemented
+        false,      // mask not structural
+        (do_in_place) ? GrB_PLUS_FP64 : NULL,   // accum
         A, B,
         My_plus_rdiv2,
         atranspose,
         btranspose,
         flipxy,
         &ignore,    // mask_applied
+        &done_in_place,
         AxB_method, &AxB_method_used, Context) ;
 
-    // does nothing if the objects are pre-compiled
+    if (info == GrB_SUCCESS)
+    {
+        if (done_in_place != do_in_place)
+        {
+            printf ("done in place: %d %d\n", do_in_place, done_in_place) ;
+            mexErrMsgTxt ("failure: not in place as expected\n") ;
+        }
+        if (!done_in_place)
+        {
+            GrB_free (&C) ;
+            C = T ;
+            T = NULL ;
+        }
+    }
+    else
+    {
+        GrB_free (&C) ;
+        GrB_free (&T) ;
+    }
+
     GrB_free (&My_rdiv2) ;
     GrB_free (&My_plus_rdiv2) ;
 
@@ -114,15 +163,13 @@ void mexFunction
     B64 = NULL ;
     C = NULL ;
 
-    #ifndef MY_RDIV
     My_rdiv2 = NULL ;
     My_plus_rdiv2 = NULL ;
-    #endif
 
     GB_WHERE (USAGE) ;
 
     // check inputs
-    if (nargout > 1 || nargin < 2 || nargin > 6)
+    if (nargout > 1 || nargin < 2 || nargin > 7)
     {
         mexErrMsgTxt ("Usage: " USAGE) ;
     }
@@ -155,11 +202,15 @@ void mexFunction
     // 1001: Gustavson
     // 1002: heap
     // 1003: dot
+    // 1004: hash
+    // 1005: saxpy
     GET_SCALAR (4, GrB_Desc_Value, AxB_method, GxB_DEFAULT) ;
 
     if (! ((AxB_method == GxB_DEFAULT) ||
         (AxB_method == GxB_AxB_GUSTAVSON) ||
         (AxB_method == GxB_AxB_HEAP) ||
+        (AxB_method == GxB_AxB_HASH) ||
+        (AxB_method == GxB_AxB_SAXPY) ||
         (AxB_method == GxB_AxB_DOT)))
     {
         mexErrMsgTxt ("unknown method") ;
@@ -168,6 +219,10 @@ void mexFunction
     // get the flipxy option
     GET_SCALAR (5, bool, flipxy, false) ;
 
+    // get the C_scalar
+    GET_SCALAR (6, double, C_scalar, 0) ;
+    // printf ("C scalar: %g\n", C_scalar) ;
+
     // determine the dimensions
     anrows = (atranspose) ? GB_NCOLS (A) : GB_NROWS (A) ;
     ancols = (atranspose) ? GB_NROWS (A) : GB_NCOLS (A) ;
@@ -179,11 +234,17 @@ void mexFunction
         mexErrMsgTxt ("invalid dimensions") ;
     }
 
+    if (atranspose && btranspose && C_scalar != 0)
+    {
+        printf ("C=A'*B'; ignoring C_scalar!\n") ;
+        C_scalar = 0 ;
+    }
+
     // convert B64 (double) to B (float)
     GrB_Matrix_new (&B, GrB_FP32, bnrows, bncols) ;
     GrB_assign (B, NULL, NULL, B64, GrB_ALL, 0, GrB_ALL, 0, NULL) ;
 
-    // B must be completed for GB_AxB_meta to work
+    // B must be completed
     GrB_Index nvals ;
     GrB_Matrix_nvals (&nvals, B) ;
 
diff --git a/Test/GB_mex_reduce_bool.c b/Test/GB_mex_reduce_bool.c
index 80543de84a..2e3413b504 100644
--- a/Test/GB_mex_reduce_bool.c
+++ b/Test/GB_mex_reduce_bool.c
@@ -2,7 +2,7 @@
 // GB_mex_reduce_bool: c = accum(c,reduce_to_scalar(A)) for boolean
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -59,8 +59,6 @@ void mexFunction
         mexErrMsgTxt ("A must be boolean") ;
     }
 
-    // GxB_print (A, 3) ;
-
     // get the op (always boolean)
     if (!GB_mx_mxArray_to_BinaryOp (&reduceop, pargin [1], "reduceop",
         GB_NOP_opcode, mxLOGICAL_CLASS, false, false))
@@ -69,8 +67,6 @@ void mexFunction
         mexErrMsgTxt ("reduceop failed") ;
     }
 
-    // GxB_print (reduceop, 3) ;
-
     // get the boolean identity value
     bool GET_SCALAR (2, bool, identity, true) ;
 
@@ -98,9 +94,6 @@ void mexFunction
         mexErrMsgTxt ("monoid failed") ;
     }
 
-    // GxB_print (reduce, 3) ;
-
-
     // reduce to a scalar
     bool result = false ;
     info = GrB_reduce (&result, NULL, reduce, A, NULL) ;
diff --git a/Test/GB_mex_reduce_complex.c b/Test/GB_mex_reduce_complex.c
index 14e97da04c..26a1bd0aac 100644
--- a/Test/GB_mex_reduce_complex.c
+++ b/Test/GB_mex_reduce_complex.c
@@ -2,7 +2,7 @@
 // GB_mex_mxm: C<Mask> = accum(C,A*B)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -48,18 +48,7 @@ void mexFunction
         FREE_ALL ;
         mexErrMsgTxt ("A failed") ;
     }
-    printf ("%p %p\n", A->type, Complex) ;
-    GxB_print (A, 2) ;
-    printf ("initially A->type is:\n") ;
-    GxB_print (A->type, 3) ;
-    GxB_print (Complex, 3) ;
-    #ifdef MY_COMPLEX
-    printf ("%p\n", My_Complex) ;
-    GxB_print (My_Complex, 3) ;
-    if (A->type == Complex) A->type = My_Complex ;
-    printf ("now A->type is:\n") ;
-    GxB_print (A->type, 3) ;
-    #endif
+
     if (A->type != Complex)
     {
         FREE_ALL ;
@@ -71,26 +60,18 @@ void mexFunction
 
     // create the monoid
     info = GxB_Monoid_terminal_new_UDT (&Times_terminal,
-        #ifdef MY_COMPLEX
-        My_Complex_times,
-        #else
-        Complex_times,
-        #endif
-        &one, &zero) ;
+        Complex_times, &one, &zero) ;
     if (info != GrB_SUCCESS)
     {
         FREE_ALL ;
         mexErrMsgTxt ("Times_terminal failed") ;
     }
 
-    // GxB_print (Times_terminal, 3) ;
-
     int64_t GET_SCALAR (1, int64_t, hack, -1) ;
     if (hack >= 0)
     {
         double complex *Ax = A->x ;
         Ax [hack] = 0 ;
-        // GxB_print (A, 2) ;
     }
 
     // reduce to a scalar
diff --git a/Test/GB_mex_reduce_terminal.c b/Test/GB_mex_reduce_terminal.c
index 436a63126b..98ed7c2873 100644
--- a/Test/GB_mex_reduce_terminal.c
+++ b/Test/GB_mex_reduce_terminal.c
@@ -2,7 +2,7 @@
 // GB_mex_reduce_terminal: [c,flag] = sum(A), reduce to scalar
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -69,51 +69,29 @@ void mexFunction
         mexErrMsgTxt ("A must be double precision") ;
     }
 
-    // printf ("\ninput matrix:\n") ;
-    // GxB_print (A, GxB_COMPLETE) ;
-
-    // printf ("\nbuilt-in max fp64 monoid:\n") ;
-    // GxB_print (GxB_MAX_FP64_MONOID, GxB_COMPLETE) ;
-
     // get the terminal value, if present.  Default is 1.
     double GET_SCALAR (1, double, terminal, 1) ;
 
     // printf ("\nterminal %g\n", terminal) ;
 
-    #ifdef MY_MAX
-    if (terminal == 1)
+    // create the Max operator
+    info = GrB_BinaryOp_new (&Max, maxdouble, GrB_FP64, GrB_FP64, GrB_FP64);
+    if (info != GrB_SUCCESS)
     {
-        // use pre-compiled monoid
-        // printf ("blazing!\n") ;
-        Max = My_Max ;
-        Max_Terminal = My_Max_Terminal1 ;
+        printf ("error: %d %s\n", info, GrB_error ( )) ;
+        mexErrMsgTxt ("Max failed") ;
     }
-    else
-    #endif
-    {
 
-        // create the Max operator
-        info = GrB_BinaryOp_new (&Max, maxdouble, GrB_FP64, GrB_FP64, GrB_FP64);
-        if (info != GrB_SUCCESS)
-        {
-            printf ("error: %d %s\n", info, GrB_error ( )) ;
-            mexErrMsgTxt ("Max failed") ;
-        }
-
-        // printf ("create the monoid:\n") ;
-
-        // create the Max monoid
-        info = GxB_Monoid_terminal_new (&Max_Terminal, Max, (double) 0,
-            terminal) ;
-        if (info != GrB_SUCCESS)
-        {
-            printf ("error: %d %s\n", info, GrB_error ( )) ;
-            mexErrMsgTxt ("Max_Terminal failed") ;
-        }
-    }
+    // printf ("create the monoid:\n") ;
 
-    // printf ("\nmax fp64 monoid with new terminal value:\n") ;
-    // GxB_print (Max_Terminal, GxB_COMPLETE) ;
+    // create the Max monoid
+    info = GxB_Monoid_terminal_new (&Max_Terminal, Max, (double) 0,
+        terminal) ;
+    if (info != GrB_SUCCESS)
+    {
+        printf ("error: %d %s\n", info, GrB_error ( )) ;
+        mexErrMsgTxt ("Max_Terminal failed") ;
+    }
 
     // reduce to a scalar
     double c ;
diff --git a/Test/GB_mex_reduce_to_scalar.c b/Test/GB_mex_reduce_to_scalar.c
index 49ba6b922b..fd571574a7 100644
--- a/Test/GB_mex_reduce_to_scalar.c
+++ b/Test/GB_mex_reduce_to_scalar.c
@@ -2,7 +2,7 @@
 // GB_mex_reduce_to_scalar: c = accum(c,reduce_to_scalar(A))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_reduce_to_vector.c b/Test/GB_mex_reduce_to_vector.c
index ecabf563a8..08991609ee 100644
--- a/Test/GB_mex_reduce_to_vector.c
+++ b/Test/GB_mex_reduce_to_vector.c
@@ -2,7 +2,7 @@
 // GB_mex_reduce_to_vector: c = accum(c,reduce_to_vector(A))
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -132,11 +132,6 @@ void mexFunction
     // w<mask> = accum (w, reduce_to_vector (A))
     METHOD (GrB_reduce (w, mask, accum, reduce, A, desc)) ;
 
-    // again, for timing
-//  FREE_DEEP_COPY ;
-//  GET_DEEP_COPY ;
-//  METHOD (GrB_reduce (w, mask, accum, reduce, A, desc)) ;
-
     // return w to MATLAB as a struct and free the GraphBLAS w
     pargout [0] = GB_mx_Vector_to_mxArray (&w, "w output", true) ;
 
diff --git a/Test/GB_mex_resize.c b/Test/GB_mex_resize.c
index 1afdf46735..4b19c49f0c 100644
--- a/Test/GB_mex_resize.c
+++ b/Test/GB_mex_resize.c
@@ -2,7 +2,7 @@
 // GB_mex_resize: resize a matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_select.c b/Test/GB_mex_select.c
index ce803400c8..c7f7a8eac8 100644
--- a/Test/GB_mex_select.c
+++ b/Test/GB_mex_select.c
@@ -2,7 +2,7 @@
 // GB_mex_select: C<M> = accum(C,select(A,k)) or select(A',k)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -132,9 +132,6 @@ void mexFunction
         C->nvec_nonempty = -1 ;
     }
 
-    // GxB_print (op, 3) ;
-    // GxB_print (Thunk, 3) ;
-
     // C<M> = accum(C,op(A))
     if (C->vdim == 1 && (desc == NULL || desc->in0 == GxB_DEFAULT))
     {
diff --git a/Test/GB_mex_semiring.c b/Test/GB_mex_semiring.c
index 4610faecd9..a7c634a05b 100644
--- a/Test/GB_mex_semiring.c
+++ b/Test/GB_mex_semiring.c
@@ -2,7 +2,7 @@
 // GB_mex_semiring: parse a semiring, for testing; returns nothing
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_setElement.c b/Test/GB_mex_setElement.c
index 835072c45f..1b30640b4d 100644
--- a/Test/GB_mex_setElement.c
+++ b/Test/GB_mex_setElement.c
@@ -2,7 +2,7 @@
 // GB_mex_setElement: MATLAB interface for A(i,j) = x
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -236,7 +236,6 @@ void mexFunction
             case GB_UINT64_code : METHOD (vset_UINT64 (A, Y, I, ni)) ; break ;
             case GB_FP32_code   : METHOD (vset_FP32   (A, Y, I, ni)) ; break ;
             case GB_FP64_code   : METHOD (vset_FP64   (A, Y, I, ni)) ; break ;
-            case GB_UCT_code    :
             case GB_UDT_code    : METHOD (vset_UDT    (A, Y, I, ni)) ; break ;
             default:
                 FREE_ALL ;
@@ -259,7 +258,6 @@ void mexFunction
             case GB_UINT64_code : METHOD (set_UINT64 (A, Y, I, J, ni)) ; break ;
             case GB_FP32_code   : METHOD (set_FP32   (A, Y, I, J, ni)) ; break ;
             case GB_FP64_code   : METHOD (set_FP64   (A, Y, I, J, ni)) ; break ;
-            case GB_UCT_code    :
             case GB_UDT_code    : METHOD (set_UDT    (A, Y, I, J, ni)) ; break ;
             default:
                 FREE_ALL ;
diff --git a/Test/GB_mex_subassign.c b/Test/GB_mex_subassign.c
index af9d2303c4..aa57c4d81e 100644
--- a/Test/GB_mex_subassign.c
+++ b/Test/GB_mex_subassign.c
@@ -2,7 +2,7 @@
 // GB_mex_subassign: C(I,J)<M> = accum (C (I,J), A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // This function is a wrapper for all GxB_*_subassign functions.
@@ -39,26 +39,39 @@
 
 #define FREE_ALL                        \
 {                                       \
+    bool A_is_M = (A == M) ;            \
+    bool A_is_C = (A == C) ;            \
+    bool C_is_M = (C == M) ;            \
     GB_MATRIX_FREE (&A) ;               \
-    GB_MATRIX_FREE (&M) ;               \
+    if (A_is_C) C = NULL ;              \
+    if (A_is_M) M = NULL ;              \
     GB_MATRIX_FREE (&C) ;               \
+    if (C_is_M) M = NULL ;              \
+    GB_MATRIX_FREE (&M) ;               \
     GrB_free (&desc) ;                  \
     if (!reduce_is_complex) GrB_free (&reduce) ;                \
     GB_mx_put_global (true, 0) ;        \
 }
 
-#define GET_DEEP_COPY \
+#define GET_DEEP_COPY                                                   \
+{                                                                       \
     C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;   \
     if (nargin > 2 && mxIsChar (pargin [1]))                            \
     {                                                                   \
-        M = GB_mx_alias ("M", pargin [1], "C",C, "A",A) ;               \
+        M = GB_mx_alias ("M", pargin [1], "C", C, "A", A) ;             \
     }                                                                   \
     if (nargin > 3 && mxIsChar (pargin [3]))                            \
     {                                                                   \
-        A = GB_mx_alias ("A", pargin [3], "C",C, "M",M) ;               \
-    }
+        A = GB_mx_alias ("A", pargin [3], "C", C, "M", M) ;             \
+    }                                                                   \
+}
 
-#define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+#define FREE_DEEP_COPY          \
+{                               \
+    if (A == C) A = NULL ;      \
+    if (M == C) M = NULL ;      \
+    GB_MATRIX_FREE (&C) ;       \
+}
 
 GrB_Matrix C = NULL ;
 GrB_Matrix M = NULL ;
@@ -111,7 +124,7 @@ GrB_Info assign (GB_Context Context)
     bool at = (desc != NULL && desc->in0 == GrB_TRAN) ;
     GrB_Info info ;
 
-    int pr = 0 ;
+    int pr = GB0 ;
     bool ph = (pr > 0) ;
 
     ASSERT_MATRIX_OK (C, "C before mex assign", pr) ;
@@ -148,7 +161,6 @@ GrB_Info assign (GB_Context Context)
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 default:
                     FREE_ALL ;
@@ -182,7 +194,6 @@ GrB_Info assign (GB_Context Context)
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GxB_subassign ((GrB_Vector) C, (GrB_Vector) M,
@@ -220,7 +231,6 @@ GrB_Info assign (GB_Context Context)
                 case GB_UINT64_code : ASSIGN (uint64_t) ;
                 case GB_FP32_code   : ASSIGN (float) ;
                 case GB_FP64_code   : ASSIGN (double) ;
-                case GB_UCT_code    :
                 case GB_UDT_code    :
                 {
                     OK (GxB_subassign (C, M, accum, Ax, I, ni, J, nj, desc)) ;
@@ -525,6 +535,7 @@ void mexFunction
             mexErrMsgTxt ("C failed") ;
         }
         mxClassID cclass = GB_mx_Type_to_classID (C->type) ;
+        // GxB_print (C, 2) ;
 
         // get accum; default: NOP, default class is class(C)
         accum = NULL ;
@@ -656,7 +667,11 @@ void mexFunction
     ASSERT_MATRIX_OK (C, "Final C before wait", GB0) ;
     GrB_wait ( ) ;
     GB_MEX_TOC ;
+
+    if (C == A) A = NULL ;      // do not free A if it is aliased to C
+    if (C == M) M = NULL ;      // do not free M if it is aliased to C
     pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C assign result", true) ;
+
     FREE_ALL ;
 }
 
diff --git a/Test/GB_mex_subassign_alias.c b/Test/GB_mex_subassign_alias.c
index 0551bdd8c5..ad9d5ae2a7 100644
--- a/Test/GB_mex_subassign_alias.c
+++ b/Test/GB_mex_subassign_alias.c
@@ -2,7 +2,7 @@
 // GB_mex_subassign_alias: C<C>(:,:) = accum(C(:,:),C)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_subref_symbolic.c b/Test/GB_mex_subref_symbolic.c
index e7815560a6..8f05e87b36 100644
--- a/Test/GB_mex_subref_symbolic.c
+++ b/Test/GB_mex_subref_symbolic.c
@@ -2,7 +2,7 @@
 // GB_mex_subref_symbolic: S=A(I,J)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_transpose.c b/Test/GB_mex_transpose.c
index ba96df2e56..bc031f0f83 100644
--- a/Test/GB_mex_transpose.c
+++ b/Test/GB_mex_transpose.c
@@ -2,7 +2,7 @@
 // GB_mex_transpose: transpose a sparse matrix and return it to MATLAB
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -15,8 +15,14 @@
 
 #define FREE_ALL                        \
 {                                       \
+    bool A_is_M = (A == M) ;            \
+    bool A_is_C = (A == C) ;            \
+    bool C_is_M = (C == M) ;            \
     GB_MATRIX_FREE (&A) ;               \
+    if (A_is_C) C = NULL ;              \
+    if (A_is_M) M = NULL ;              \
     GB_MATRIX_FREE (&C) ;               \
+    if (C_is_M) M = NULL ;              \
     GB_MATRIX_FREE (&M) ;               \
     GrB_free (&desc) ;                  \
     GB_mx_put_global (true, 0) ;        \
@@ -67,12 +73,30 @@ void mexFunction
     }
 
     // get C (make a deep copy) and get any aliases for M and A
-    #define GET_DEEP_COPY \
-    C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;          \
-    if (nargin > 5 && C != NULL) C->nvec_nonempty = -1 ;  /* for testing */    \
-    if (mxIsChar (pargin [1])) M = GB_mx_alias ("M", pargin [1], "C",C, "A",A);\
-    if (mxIsChar (pargin [3])) A = GB_mx_alias ("A", pargin [3], "C",C, "M",M);
-    #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ;
+    #define GET_DEEP_COPY                                                   \
+    {                                                                       \
+        C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true, true) ;   \
+        if (nargin > 5 && C != NULL)                                        \
+        {                                                                   \
+            C->nvec_nonempty = -1 ;  /* for testing */                      \
+        }                                                                   \
+        if (mxIsChar (pargin [1]))                                          \
+        {                                                                   \
+            M = GB_mx_alias ("M", pargin [1], "C", C, "A", A) ;             \
+        }                                                                   \
+        if (mxIsChar (pargin [3]))                                          \
+        {                                                                   \
+            A = GB_mx_alias ("A", pargin [3], "C", C, "M", M) ;             \
+        }                                                                   \
+    }
+
+    #define FREE_DEEP_COPY          \
+    {                               \
+        if (A == C) A = NULL ;      \
+        if (M == C) M = NULL ;      \
+        GB_MATRIX_FREE (&C) ;       \
+    }
+
     GET_DEEP_COPY ;
     if (C == NULL)
     {
@@ -109,7 +133,10 @@ void mexFunction
     METHOD (GrB_transpose (C, M, accum, A, desc)) ;
 
     // return C to MATLAB as a struct and free the GraphBLAS C
+    if (C == A) A = NULL ;      // do not free A if it is aliased to C
+    if (C == M) M = NULL ;      // do not free M if it is aliased to C
     pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ;
+    // C is now NULL
 
     FREE_ALL ;
 }
diff --git a/Test/GB_mex_tricount.c b/Test/GB_mex_tricount.c
index 6fc069e8b9..81d20affa8 100644
--- a/Test/GB_mex_tricount.c
+++ b/Test/GB_mex_tricount.c
@@ -2,7 +2,7 @@
 // GB_mex_tricount: count the number of triangles in a graph
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 // Usage: ntri = GB_mex_tricount (method, A, E, L, U) ;
diff --git a/Test/GB_mex_tril.c b/Test/GB_mex_tril.c
index d8fb803560..e6b69c48de 100644
--- a/Test/GB_mex_tril.c
+++ b/Test/GB_mex_tril.c
@@ -2,7 +2,7 @@
 // GB_mex_tril: compute C=tril(A,1)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_triu.c b/Test/GB_mex_triu.c
index 1531e4896c..0d98d8ed1c 100644
--- a/Test/GB_mex_triu.c
+++ b/Test/GB_mex_triu.c
@@ -2,7 +2,7 @@
 // GB_mex_triu: compute C=triu(A,1)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_vxm.c b/Test/GB_mex_vxm.c
index 5a33b8debb..d941156073 100644
--- a/Test/GB_mex_vxm.c
+++ b/Test/GB_mex_vxm.c
@@ -2,7 +2,7 @@
 // GB_mex_vxm: w'<mask> = accum(w',u'A)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mex_wathen.c b/Test/GB_mex_wathen.c
index 66cfd273d8..0a594ecf95 100644
--- a/Test/GB_mex_wathen.c
+++ b/Test/GB_mex_wathen.c
@@ -2,7 +2,7 @@
 // GB_mex_wathen: construct a random finite-element matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_Matrix_to_mxArray.c b/Test/GB_mx_Matrix_to_mxArray.c
index 03cf807691..e98945fdec 100644
--- a/Test/GB_mx_Matrix_to_mxArray.c
+++ b/Test/GB_mx_Matrix_to_mxArray.c
@@ -2,7 +2,7 @@
 // GB_mx_Matrix_to_mxArray
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_Monoid.c b/Test/GB_mx_Monoid.c
index 128b5d7138..9ff08eeece 100644
--- a/Test/GB_mx_Monoid.c
+++ b/Test/GB_mx_Monoid.c
@@ -2,17 +2,13 @@
 // GB_mx_Monoid: construct a monoid from a built-in operator
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 // Also defines the identity of the monoid
 
-// See Source/GB_AxB_Gustavson_builtin.c for a description of the built-in
-// monoids.  This function can construct all 52 of them (note that 8 of those
-// are redudant).
-
 #define GET_DEEP_COPY ;
 #define FREE_DEEP_COPY ;
 #define FREE_ALL ;
@@ -43,7 +39,7 @@ bool GB_mx_Monoid               // true if successful, false otherwise
             // 11 MIN monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with AND
+                // bool case redundant with AND
                 case GB_BOOL_code   : METHOD (GrB_Monoid_new (&M, add, (bool    ) true)) ;        break ;
                 case GB_INT8_code   : METHOD (GrB_Monoid_new (&M, add, (int8_t  ) INT8_MAX)) ;    break ;
                 case GB_UINT8_code  : METHOD (GrB_Monoid_new (&M, add, (uint8_t ) UINT8_MAX)) ;   break ;
@@ -66,7 +62,7 @@ bool GB_mx_Monoid               // true if successful, false otherwise
             // 11 MAX monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with OR
+                // bool case redundant with OR
                 case GB_BOOL_code   : METHOD (GrB_Monoid_new (&M, add, (bool    ) false)) ;       break ;
                 case GB_INT8_code   : METHOD (GrB_Monoid_new (&M, add, (int8_t  ) INT8_MIN)) ;    break ;
                 case GB_UINT8_code  : METHOD (GrB_Monoid_new (&M, add, (uint8_t ) 0)) ;           break ;
@@ -89,7 +85,7 @@ bool GB_mx_Monoid               // true if successful, false otherwise
             // 11 PLUS monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with OR
+                // bool case redundant with OR
                 case GB_BOOL_code   : METHOD (GrB_Monoid_new (&M, add, (bool    ) 0)) ;           break ;
                 case GB_INT8_code   : METHOD (GrB_Monoid_new (&M, add, (int8_t  ) 0)) ;           break ;
                 case GB_UINT8_code  : METHOD (GrB_Monoid_new (&M, add, (uint8_t ) 0)) ;           break ;
@@ -112,7 +108,7 @@ bool GB_mx_Monoid               // true if successful, false otherwise
             // 11 TIMES monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with AND
+                // bool case redundant with AND
                 case GB_BOOL_code   : METHOD (GrB_Monoid_new (&M, add, (bool    ) true)) ;        break ;
                 case GB_INT8_code   : METHOD (GrB_Monoid_new (&M, add, (int8_t  ) 1)) ;           break ;
                 case GB_UINT8_code  : METHOD (GrB_Monoid_new (&M, add, (uint8_t ) 1)) ;           break ;
@@ -130,6 +126,28 @@ bool GB_mx_Monoid               // true if successful, false otherwise
             }
             break ;
 
+        case GB_ANY_opcode   :
+
+            // 11 ANY monoids
+            switch (add->xtype->code)
+            {
+                case GB_BOOL_code   : METHOD (GrB_Monoid_new (&M, add, (bool    ) false)) ;       break ;
+                case GB_INT8_code   : METHOD (GrB_Monoid_new (&M, add, (int8_t  ) 0)) ;           break ;
+                case GB_UINT8_code  : METHOD (GrB_Monoid_new (&M, add, (uint8_t ) 0)) ;           break ;
+                case GB_INT16_code  : METHOD (GrB_Monoid_new (&M, add, (int16_t ) 0)) ;           break ;
+                case GB_UINT16_code : METHOD (GrB_Monoid_new (&M, add, (uint16_t) 0)) ;           break ;
+                case GB_INT32_code  : METHOD (GrB_Monoid_new (&M, add, (int32_t ) 0)) ;           break ;
+                case GB_UINT32_code : METHOD (GrB_Monoid_new (&M, add, (uint32_t) 0)) ;           break ;
+                case GB_INT64_code  : METHOD (GrB_Monoid_new (&M, add, (int64_t ) 0)) ;           break ;
+                case GB_UINT64_code : METHOD (GrB_Monoid_new (&M, add, (uint64_t) 0)) ;           break ;
+                case GB_FP32_code   : METHOD (GrB_Monoid_new (&M, add, (float   ) 0)) ;           break ;
+                case GB_FP64_code   : METHOD (GrB_Monoid_new (&M, add, (double  ) 0)) ;           break ;
+                default: 
+                    mexWarnMsgIdAndTxt ("GB:warn", "unknown type") ;
+                    return (false) ;
+            }
+            break ;
+
         case GB_LOR_opcode      :
 
             // 2 OR boolean monoids
diff --git a/Test/GB_mx_Sauna_nmalloc.c b/Test/GB_mx_Sauna_nmalloc.c
deleted file mode 100644
index 14ecb856c2..0000000000
--- a/Test/GB_mx_Sauna_nmalloc.c
+++ /dev/null
@@ -1,37 +0,0 @@
-//------------------------------------------------------------------------------
-// GB_mx_Sauna_nmalloc: get the stats on Sauna usage
-//------------------------------------------------------------------------------
-
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
-//------------------------------------------------------------------------------
-
-#include "GB_mex.h"
-
-int GB_mx_Sauna_nmalloc ( )   // return # of mallocs in Saunas in use
-{
-    
-    int nmallocs_in_use = 0 ;
-
-    for (int Sauna_id = 0 ; Sauna_id < GxB_NTHREADS_MAX ; Sauna_id++)
-    {
-
-        GB_Sauna Sauna = GB_Global_Saunas_get (Sauna_id) ;
-        if (Sauna != NULL)
-        { 
-            nmallocs_in_use++ ;             // the Sauna header
-            if (Sauna->Sauna_Mark != NULL)
-            {
-                nmallocs_in_use++ ;
-            }
-            if (Sauna->Sauna_Work != NULL)
-            {
-                nmallocs_in_use++ ;
-            }
-        }
-    }
-
-    return (nmallocs_in_use) ;
-}
-
diff --git a/Test/GB_mx_Type_to_classID.c b/Test/GB_mx_Type_to_classID.c
index ac42e5ac26..ce22e9fdbd 100644
--- a/Test/GB_mx_Type_to_classID.c
+++ b/Test/GB_mx_Type_to_classID.c
@@ -2,7 +2,7 @@
 // GB_mx_Type_to_classID: return the GraphBLAS type of the MATLAB class ID
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -29,9 +29,7 @@ mxClassID GB_mx_Type_to_classID        // returns a MATLAB class ID
         case GB_FP32_code   : return (mxSINGLE_CLASS ) ;   // 9  -> 7
         case GB_FP64_code   : return (mxDOUBLE_CLASS ) ;   // 10 -> 6
         // assume user-defined type is Complex, and MATLAB double complex
-        case GB_UCT_code    :
-        case GB_UDT_code    :
-                              return (mxDOUBLE_CLASS ) ;   // 11,12 -> 6
+        case GB_UDT_code    : return (mxDOUBLE_CLASS ) ;   // 11 -> 6
         default             : ;
     }
     mexWarnMsgIdAndTxt ("GB:warn", "invalid type code") ;
diff --git a/Test/GB_mx_Vector_to_mxArray.c b/Test/GB_mx_Vector_to_mxArray.c
index 453fbab570..25d0528bde 100644
--- a/Test/GB_mx_Vector_to_mxArray.c
+++ b/Test/GB_mx_Vector_to_mxArray.c
@@ -2,7 +2,7 @@
 // GB_mx_Vector_to_mxArray
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_abort.c b/Test/GB_mx_abort.c
index 116b967e32..fe18a53777 100644
--- a/Test/GB_mx_abort.c
+++ b/Test/GB_mx_abort.c
@@ -2,7 +2,7 @@
 // GB_mx_abort: terminate MATLAB
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_alias.c b/Test/GB_mx_alias.c
index 92012fe88f..c85b37287a 100644
--- a/Test/GB_mx_alias.c
+++ b/Test/GB_mx_alias.c
@@ -2,7 +2,7 @@
 // GB_mx_alias:  return an aliased argument
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_builtin_monoid.c b/Test/GB_mx_builtin_monoid.c
index 2de3b603c1..021f02798e 100644
--- a/Test/GB_mx_builtin_monoid.c
+++ b/Test/GB_mx_builtin_monoid.c
@@ -2,7 +2,7 @@
 // GB_mx_builtin_monoid: return a built-in monoid
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -54,7 +54,7 @@ GrB_Monoid GB_mx_builtin_monoid     // built-in monoid, or NULL if error
             // 11 MAX monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with OR
+                // bool case redundant with OR
                 case GB_BOOL_code   : return (GxB_LOR_BOOL_MONOID     ) ;
                 case GB_INT8_code   : return (GxB_MAX_INT8_MONOID     ) ;
                 case GB_UINT8_code  : return (GxB_MAX_UINT8_MONOID    ) ;
@@ -77,7 +77,7 @@ GrB_Monoid GB_mx_builtin_monoid     // built-in monoid, or NULL if error
             // 11 PLUS monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with OR
+                // bool case redundant with OR
                 case GB_BOOL_code   : return (GxB_LOR_BOOL_MONOID     ) ;
                 case GB_INT8_code   : return (GxB_PLUS_INT8_MONOID    ) ;
                 case GB_UINT8_code  : return (GxB_PLUS_UINT8_MONOID   ) ;
@@ -100,7 +100,7 @@ GrB_Monoid GB_mx_builtin_monoid     // built-in monoid, or NULL if error
             // 11 TIMES monoids
             switch (add->xtype->code)
             {
-                // bool case redudandt with AND
+                // bool case redundant with AND
                 case GB_BOOL_code   : return (GxB_LAND_BOOL_MONOID    ) ;
                 case GB_INT8_code   : return (GxB_TIMES_INT8_MONOID   ) ;
                 case GB_UINT8_code  : return (GxB_TIMES_UINT8_MONOID  ) ;
@@ -118,6 +118,28 @@ GrB_Monoid GB_mx_builtin_monoid     // built-in monoid, or NULL if error
             }
             break ;
 
+        case GB_ANY_opcode   :
+
+            // 11 ANY monoids
+            switch (add->xtype->code)
+            {
+                case GB_BOOL_code   : return (GxB_ANY_BOOL_MONOID   ) ;
+                case GB_INT8_code   : return (GxB_ANY_INT8_MONOID   ) ;
+                case GB_UINT8_code  : return (GxB_ANY_UINT8_MONOID  ) ;
+                case GB_INT16_code  : return (GxB_ANY_INT16_MONOID  ) ;
+                case GB_UINT16_code : return (GxB_ANY_UINT16_MONOID ) ;
+                case GB_INT32_code  : return (GxB_ANY_INT32_MONOID  ) ;
+                case GB_UINT32_code : return (GxB_ANY_UINT32_MONOID ) ;
+                case GB_INT64_code  : return (GxB_ANY_INT64_MONOID  ) ;
+                case GB_UINT64_code : return (GxB_ANY_UINT64_MONOID ) ;
+                case GB_FP32_code   : return (GxB_ANY_FP32_MONOID   ) ;
+                case GB_FP64_code   : return (GxB_ANY_FP64_MONOID   ) ;
+                default: 
+                    mexWarnMsgIdAndTxt ("GB:warn", "unknown type") ;
+                    return (NULL) ;
+            }
+            break ;
+
         case GB_LOR_opcode      :
 
             // 2 OR boolean monoids
diff --git a/Test/GB_mx_builtin_semiring.c b/Test/GB_mx_builtin_semiring.c
index 96de0756bd..1a44d97528 100644
--- a/Test/GB_mx_builtin_semiring.c
+++ b/Test/GB_mx_builtin_semiring.c
@@ -2,54 +2,14 @@
 // GB_mx_semiring: get a built-in semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
 
 #include "GB_mex.h"
 
-//------------------------------------------------------------------------------
-// built-in semirings
-//------------------------------------------------------------------------------
-
-// Using built-in types and operators, 1040 unique semirings can be built.  This
-// count excludes redundant Boolean operators (for example GxB_TIMES_BOOL and
-// GxB_LAND_BOOL are different operators but they are redundant since they
-// always return the same result):
-
-// 760 semirings with a multiply operator TxT -> T where T is non-Boolean, from
-// the complete cross product of:
-
-//      4 add monoids (MIN, MAX, PLUS, TIMES)
-//      19 multiply operators:
-//          (FIRST, SECOND, MIN, MAX, PLUS, MINUS, RMINUS, TIMES, DIV, RDIV,
-//           ISEQ, ISNE, ISGT, ISLT, ISGE, ISLE,
-//           LOR, LAND, LXOR)
-//      10 non-Boolean types, T
-
-// 240 semirings with a comparison operator TxT -> bool, where T is
-// non-Boolean, from the complete cross product of:
-
-//      4 Boolean add monoids: (LAND, LOR, LXOR, EQ)
-//      6 multiply operators: (EQ, NE, GT, LT, GE, LE)
-//      10 non-Boolean types, T
-
-// 40 semirings with purely Boolean types, bool x bool -> bool, from the
-// complete cross product of:
-
-//      4 Boolean add monoids (LAND, LOR, LXOR, EQ)
-//      10 multiply operators:
-//          (FIRST, SECOND, LOR, LAND, LXOR, EQ, GT, LT, GE, LE)
-
-// In the names below, each semiring has a name of the form GxB_add_mult_T
-// where add is the additive monoid, mult is the multiply operator, and T is
-// the type.  The type T is always the type of x and y for the z=mult(x,y)
-// operator.  The monoid's three types and the ztype of the mult operator are
-// always the same.  This is the type T for the first set, and Boolean for
-// the second and third sets of semirngs.
-
-//------------------------------------------------------------------------------
+// See GraphBLAS/GraphBLAS/@GrB/private/util/gb_semiring.c for a description
 
 GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 (
@@ -77,7 +37,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
     // or not this function handles the semiring as hard-coded.  Now return for
     // cases this function does not handle.
 
-    if (add_opcode >= GB_USER_C_opcode || mult_opcode >= GB_USER_C_opcode)
+    if (add_opcode >= GB_USER_opcode || mult_opcode >= GB_USER_opcode)
     {
         // This function handles only built-in operators.
         return (NULL) ;
@@ -131,7 +91,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 640 semirings with TxT->T multiply operators
+        // 1000 semirings with TxT->T multiply operators
         //----------------------------------------------------------------------
 
         // x,y,z are all the same non-Boolean type
@@ -139,7 +99,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
         switch (mult_opcode)
         {
 
-            case GB_FIRST_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_FIRST_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -148,78 +108,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_FIRST_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MIN_FIRST_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MIN_FIRST_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MIN_FIRST_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MIN_FIRST_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MIN_FIRST_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MIN_FIRST_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MIN_FIRST_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MIN_FIRST_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MIN_FIRST_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_FIRST_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_FIRST_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MAX_FIRST_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MAX_FIRST_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MAX_FIRST_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MAX_FIRST_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MAX_FIRST_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MAX_FIRST_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MAX_FIRST_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MAX_FIRST_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MAX_FIRST_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_FIRST_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_FIRST_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_FIRST_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_PLUS_FIRST_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_FIRST_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_PLUS_FIRST_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_FIRST_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_PLUS_FIRST_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_FIRST_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_FIRST_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_FIRST_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_FIRST_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_FIRST_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_FIRST_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_FIRST_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_FIRST_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_FIRST_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_FIRST_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_FIRST_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_FIRST_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_FIRST_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_FIRST_INT8  ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_FIRST_UINT8 ) ;
-                            case GB_INT16_code : return (GxB_TIMES_FIRST_INT16 ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_FIRST_UINT16) ;
-                            case GB_INT32_code : return (GxB_TIMES_FIRST_INT32 ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_FIRST_UINT32) ;
-                            case GB_INT64_code : return (GxB_TIMES_FIRST_INT64 ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_FIRST_UINT64) ;
-                            case GB_FP32_code  : return (GxB_TIMES_FIRST_FP32  ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_FIRST_FP64  ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_FIRST_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_FIRST_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_FIRST_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_FIRST_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_FIRST_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_FIRST_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_FIRST_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_FIRST_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_FIRST_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_FIRST_FP64   ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_FIRST_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_FIRST_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_FIRST_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_FIRST_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_FIRST_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_FIRST_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_FIRST_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_FIRST_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_FIRST_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_FIRST_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_SECOND_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_SECOND_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -228,78 +206,194 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_SECOND_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_MIN_SECOND_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_MIN_SECOND_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_MIN_SECOND_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_MIN_SECOND_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_MIN_SECOND_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_MIN_SECOND_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_MIN_SECOND_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_MIN_SECOND_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_MIN_SECOND_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MIN_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MIN_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MIN_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MIN_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MIN_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MIN_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MIN_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MIN_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MIN_SECOND_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_SECOND_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_MAX_SECOND_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_MAX_SECOND_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_MAX_SECOND_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_MAX_SECOND_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_MAX_SECOND_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_MAX_SECOND_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_MAX_SECOND_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_MAX_SECOND_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_MAX_SECOND_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_MAX_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_MAX_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_MAX_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_MAX_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_MAX_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_MAX_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_MAX_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_MAX_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_MAX_SECOND_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_SECOND_INT8  ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_SECOND_UINT8 ) ;
-                            case GB_INT16_code : return (GxB_PLUS_SECOND_INT16 ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_SECOND_UINT16) ;
-                            case GB_INT32_code : return (GxB_PLUS_SECOND_INT32 ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_SECOND_UINT32) ;
-                            case GB_INT64_code : return (GxB_PLUS_SECOND_INT64 ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_SECOND_UINT64) ;
-                            case GB_FP32_code  : return (GxB_PLUS_SECOND_FP32  ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_SECOND_FP64  ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_SECOND_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_SECOND_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_PLUS_SECOND_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_SECOND_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_PLUS_SECOND_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_SECOND_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_PLUS_SECOND_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_SECOND_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_SECOND_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_SECOND_FP64   ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_SECOND_INT8 ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_SECOND_UINT8) ;
-                            case GB_INT16_code : return (GxB_TIMES_SECOND_INT16) ;
+                            case GB_INT8_code  : return (GxB_TIMES_SECOND_INT8  ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_SECOND_UINT8 ) ;
+                            case GB_INT16_code : return (GxB_TIMES_SECOND_INT16 ) ;
                             case GB_UINT16_code: return (GxB_TIMES_SECOND_UINT16) ;
-                            case GB_INT32_code : return (GxB_TIMES_SECOND_INT32) ;
+                            case GB_INT32_code : return (GxB_TIMES_SECOND_INT32 ) ;
                             case GB_UINT32_code: return (GxB_TIMES_SECOND_UINT32) ;
-                            case GB_INT64_code : return (GxB_TIMES_SECOND_INT64) ;
+                            case GB_INT64_code : return (GxB_TIMES_SECOND_INT64 ) ;
                             case GB_UINT64_code: return (GxB_TIMES_SECOND_UINT64) ;
-                            case GB_FP32_code  : return (GxB_TIMES_SECOND_FP32 ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_SECOND_FP64 ) ;
-                            default : ; 
+                            case GB_FP32_code  : return (GxB_TIMES_SECOND_FP32  ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_SECOND_FP64  ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_SECOND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_ANY_SECOND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_ANY_SECOND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_ANY_SECOND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_ANY_SECOND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_ANY_SECOND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_ANY_SECOND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_ANY_SECOND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_ANY_SECOND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_ANY_SECOND_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    default : ;
+                }
+
+            case GB_PAIR_opcode : // with (5 monoids) x (10 nonboolean types)
+
+                switch (add_opcode)
+                {
+
+                    case GB_MIN_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_MIN_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_PAIR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_MAX_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_MAX_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_PAIR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_PLUS_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_PLUS_PAIR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_PAIR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_PAIR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_PAIR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_PAIR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_PAIR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_PAIR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_PAIR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_PAIR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_PAIR_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_TIMES_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_TIMES_PAIR_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_PAIR_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_PAIR_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_PAIR_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_PAIR_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_PAIR_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_PAIR_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_PAIR_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_PAIR_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_PAIR_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_PAIR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_PAIR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_PAIR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_PAIR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_PAIR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_PAIR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_PAIR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_PAIR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_PAIR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_PAIR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_MIN_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MIN_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -308,78 +402,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_MIN_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MIN_MIN_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MIN_MIN_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MIN_MIN_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MIN_MIN_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MIN_MIN_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MIN_MIN_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MIN_MIN_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MIN_MIN_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MIN_MIN_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MIN_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_MIN_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MAX_MIN_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MAX_MIN_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MAX_MIN_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MAX_MIN_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MAX_MIN_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MAX_MIN_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MAX_MIN_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MAX_MIN_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MAX_MIN_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MIN_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_MIN_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_MIN_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_PLUS_MIN_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_MIN_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_PLUS_MIN_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_MIN_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_PLUS_MIN_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_MIN_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_MIN_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_MIN_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_MIN_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MIN_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MIN_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MIN_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MIN_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MIN_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MIN_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MIN_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MIN_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MIN_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_MIN_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_MIN_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_TIMES_MIN_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_MIN_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_TIMES_MIN_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_MIN_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_TIMES_MIN_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_MIN_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_MIN_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_MIN_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_MIN_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MIN_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MIN_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MIN_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MIN_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MIN_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MIN_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MIN_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MIN_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MIN_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MIN_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MIN_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_MIN_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MIN_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_MIN_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MIN_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_MIN_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MIN_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MIN_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MIN_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_MAX_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MAX_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -388,78 +500,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_MAX_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MIN_MAX_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MIN_MAX_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MIN_MAX_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MIN_MAX_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MIN_MAX_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MIN_MAX_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MIN_MAX_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MIN_MAX_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MIN_MAX_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MAX_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_MAX_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MAX_MAX_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MAX_MAX_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MAX_MAX_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MAX_MAX_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MAX_MAX_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MAX_MAX_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MAX_MAX_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MAX_MAX_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MAX_MAX_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MAX_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_MAX_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_MAX_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_PLUS_MAX_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_MAX_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_PLUS_MAX_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_MAX_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_PLUS_MAX_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_MAX_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_MAX_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_MAX_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_MAX_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MAX_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MAX_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MAX_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MAX_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MAX_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MAX_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MAX_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MAX_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MAX_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_MAX_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_MAX_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_TIMES_MAX_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_MAX_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_TIMES_MAX_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_MAX_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_TIMES_MAX_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_MAX_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_MAX_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_MAX_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_MAX_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MAX_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MAX_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MAX_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MAX_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MAX_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MAX_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MAX_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MAX_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MAX_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MAX_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MAX_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_MAX_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MAX_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_MAX_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MAX_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_MAX_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MAX_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MAX_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MAX_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_PLUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_PLUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -468,78 +598,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_PLUS_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_PLUS_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_PLUS_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_PLUS_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_PLUS_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_PLUS_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_PLUS_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_PLUS_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_PLUS_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_PLUS_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_PLUS_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_PLUS_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_PLUS_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_PLUS_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_PLUS_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_PLUS_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_PLUS_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_PLUS_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_PLUS_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_PLUS_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_PLUS_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_PLUS_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_PLUS_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_PLUS_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_PLUS_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_PLUS_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_PLUS_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_PLUS_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_PLUS_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_PLUS_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_PLUS_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_PLUS_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_PLUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_PLUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_PLUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_PLUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_PLUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_PLUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_PLUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_PLUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_PLUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_PLUS_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_PLUS_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_PLUS_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_PLUS_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_PLUS_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_PLUS_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_PLUS_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_PLUS_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_PLUS_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_PLUS_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_PLUS_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_PLUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_PLUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_PLUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_PLUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_PLUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_PLUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_PLUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_PLUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_PLUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_PLUS_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_PLUS_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_PLUS_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_PLUS_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_PLUS_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_PLUS_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_PLUS_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_PLUS_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_PLUS_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_PLUS_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_PLUS_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_MINUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_MINUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -548,78 +696,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_MINUS_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MIN_MINUS_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MIN_MINUS_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MIN_MINUS_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MIN_MINUS_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MIN_MINUS_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MIN_MINUS_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MIN_MINUS_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MIN_MINUS_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MIN_MINUS_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_MINUS_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_MINUS_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MAX_MINUS_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MAX_MINUS_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MAX_MINUS_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MAX_MINUS_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MAX_MINUS_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MAX_MINUS_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MAX_MINUS_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MAX_MINUS_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MAX_MINUS_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_MINUS_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_MINUS_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_MINUS_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_PLUS_MINUS_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_MINUS_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_PLUS_MINUS_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_MINUS_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_PLUS_MINUS_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_MINUS_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_MINUS_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_MINUS_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_MINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_MINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_MINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_MINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_MINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_MINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_MINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_MINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_MINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_MINUS_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_MINUS_INT8  ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_MINUS_UINT8 ) ;
-                            case GB_INT16_code : return (GxB_TIMES_MINUS_INT16 ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_MINUS_UINT16) ;
-                            case GB_INT32_code : return (GxB_TIMES_MINUS_INT32 ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_MINUS_UINT32) ;
-                            case GB_INT64_code : return (GxB_TIMES_MINUS_INT64 ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_MINUS_UINT64) ;
-                            case GB_FP32_code  : return (GxB_TIMES_MINUS_FP32  ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_MINUS_FP64  ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_MINUS_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_MINUS_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_MINUS_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_MINUS_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_MINUS_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_MINUS_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_MINUS_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_MINUS_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_MINUS_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_MINUS_FP64   ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_MINUS_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_MINUS_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_MINUS_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_MINUS_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_MINUS_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_MINUS_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_MINUS_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_MINUS_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_MINUS_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_MINUS_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_RMINUS_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_RMINUS_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -638,9 +804,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_MIN_RMINUS_UINT64  ) ;
                             case GB_FP32_code  : return (GxB_MIN_RMINUS_FP32    ) ;
                             case GB_FP64_code  : return (GxB_MIN_RMINUS_FP64    ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
@@ -656,9 +822,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_MAX_RMINUS_UINT64  ) ;
                             case GB_FP32_code  : return (GxB_MAX_RMINUS_FP32    ) ;
                             case GB_FP64_code  : return (GxB_MAX_RMINUS_FP64    ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
@@ -674,9 +840,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_PLUS_RMINUS_UINT64 ) ;
                             case GB_FP32_code  : return (GxB_PLUS_RMINUS_FP32   ) ;
                             case GB_FP64_code  : return (GxB_PLUS_RMINUS_FP64   ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
@@ -692,14 +858,32 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_TIMES_RMINUS_UINT64) ;
                             case GB_FP32_code  : return (GxB_TIMES_RMINUS_FP32  ) ;
                             case GB_FP64_code  : return (GxB_TIMES_RMINUS_FP64  ) ;
-                            default : ; 
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_RMINUS_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_ANY_RMINUS_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_ANY_RMINUS_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_ANY_RMINUS_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_ANY_RMINUS_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_ANY_RMINUS_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_ANY_RMINUS_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_ANY_RMINUS_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_ANY_RMINUS_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_ANY_RMINUS_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_TIMES_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_TIMES_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -708,78 +892,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_TIMES_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MIN_TIMES_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MIN_TIMES_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MIN_TIMES_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MIN_TIMES_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MIN_TIMES_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MIN_TIMES_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MIN_TIMES_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MIN_TIMES_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MIN_TIMES_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MIN_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MIN_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MIN_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MIN_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MIN_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MIN_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MIN_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MIN_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MIN_TIMES_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_TIMES_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_MAX_TIMES_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_MAX_TIMES_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_MAX_TIMES_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_MAX_TIMES_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_MAX_TIMES_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_MAX_TIMES_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_MAX_TIMES_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_MAX_TIMES_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_MAX_TIMES_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_MAX_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_MAX_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_MAX_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_MAX_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_MAX_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_MAX_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_MAX_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_MAX_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_MAX_TIMES_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_TIMES_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_TIMES_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_PLUS_TIMES_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_TIMES_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_PLUS_TIMES_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_TIMES_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_PLUS_TIMES_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_TIMES_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_TIMES_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_TIMES_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_TIMES_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_TIMES_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_PLUS_TIMES_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_TIMES_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_PLUS_TIMES_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_TIMES_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_PLUS_TIMES_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_TIMES_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_TIMES_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_TIMES_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_TIMES_INT8  ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_TIMES_UINT8 ) ;
-                            case GB_INT16_code : return (GxB_TIMES_TIMES_INT16 ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_TIMES_UINT16) ;
-                            case GB_INT32_code : return (GxB_TIMES_TIMES_INT32 ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_TIMES_UINT32) ;
-                            case GB_INT64_code : return (GxB_TIMES_TIMES_INT64 ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_TIMES_UINT64) ;
-                            case GB_FP32_code  : return (GxB_TIMES_TIMES_FP32  ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_TIMES_FP64  ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_TIMES_INT8   ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_TIMES_UINT8  ) ;
+                            case GB_INT16_code : return (GxB_TIMES_TIMES_INT16  ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_TIMES_UINT16 ) ;
+                            case GB_INT32_code : return (GxB_TIMES_TIMES_INT32  ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_TIMES_UINT32 ) ;
+                            case GB_INT64_code : return (GxB_TIMES_TIMES_INT64  ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_TIMES_UINT64 ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_TIMES_FP32   ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_TIMES_FP64   ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_TIMES_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_ANY_TIMES_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_ANY_TIMES_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_ANY_TIMES_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_ANY_TIMES_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_ANY_TIMES_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_ANY_TIMES_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_ANY_TIMES_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_ANY_TIMES_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_ANY_TIMES_FP64     ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_DIV_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_DIV_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -788,78 +990,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_DIV_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MIN_DIV_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MIN_DIV_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MIN_DIV_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MIN_DIV_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MIN_DIV_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MIN_DIV_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MIN_DIV_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MIN_DIV_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MIN_DIV_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_DIV_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_DIV_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MAX_DIV_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MAX_DIV_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MAX_DIV_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MAX_DIV_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MAX_DIV_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MAX_DIV_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MAX_DIV_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MAX_DIV_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MAX_DIV_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_DIV_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_DIV_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_DIV_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_PLUS_DIV_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_DIV_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_PLUS_DIV_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_DIV_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_PLUS_DIV_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_DIV_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_DIV_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_DIV_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_DIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_DIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_DIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_DIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_DIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_DIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_DIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_DIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_DIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_DIV_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_DIV_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_DIV_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_TIMES_DIV_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_DIV_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_TIMES_DIV_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_DIV_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_TIMES_DIV_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_DIV_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_DIV_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_DIV_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_DIV_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_DIV_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_DIV_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_DIV_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_DIV_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_DIV_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_DIV_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_DIV_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_DIV_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_DIV_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_DIV_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_DIV_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_DIV_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_DIV_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_DIV_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_DIV_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_DIV_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_DIV_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_DIV_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_DIV_FP64       ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_RDIV_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_RDIV_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -878,9 +1098,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_MIN_RDIV_UINT64    ) ;
                             case GB_FP32_code  : return (GxB_MIN_RDIV_FP32      ) ;
                             case GB_FP64_code  : return (GxB_MIN_RDIV_FP64      ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
@@ -896,9 +1116,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_MAX_RDIV_UINT64    ) ;
                             case GB_FP32_code  : return (GxB_MAX_RDIV_FP32      ) ;
                             case GB_FP64_code  : return (GxB_MAX_RDIV_FP64      ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
@@ -914,9 +1134,9 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_PLUS_RDIV_UINT64   ) ;
                             case GB_FP32_code  : return (GxB_PLUS_RDIV_FP32     ) ;
                             case GB_FP64_code  : return (GxB_PLUS_RDIV_FP64     ) ;
-                            default : ; 
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
@@ -932,14 +1152,32 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
                             case GB_UINT64_code: return (GxB_TIMES_RDIV_UINT64  ) ;
                             case GB_FP32_code  : return (GxB_TIMES_RDIV_FP32    ) ;
                             case GB_FP64_code  : return (GxB_TIMES_RDIV_FP64    ) ;
-                            default : ; 
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_RDIV_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_RDIV_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_RDIV_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_RDIV_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_RDIV_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_RDIV_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_RDIV_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_RDIV_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_RDIV_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_RDIV_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISEQ_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISEQ_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -948,78 +1186,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISEQ_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISEQ_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISEQ_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISEQ_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISEQ_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISEQ_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISEQ_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISEQ_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISEQ_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISEQ_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISEQ_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISEQ_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISEQ_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISEQ_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISEQ_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISEQ_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISEQ_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISEQ_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISEQ_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISEQ_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISEQ_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISEQ_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISEQ_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISEQ_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISEQ_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISEQ_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISEQ_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISEQ_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISEQ_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISEQ_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISEQ_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISEQ_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISEQ_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISEQ_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISEQ_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISEQ_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISEQ_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISEQ_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISEQ_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISEQ_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISEQ_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISEQ_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISEQ_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISEQ_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISEQ_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISEQ_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISEQ_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISEQ_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISEQ_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISEQ_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISEQ_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISEQ_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISEQ_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISEQ_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISEQ_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISEQ_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISEQ_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISEQ_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISEQ_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISEQ_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISEQ_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISEQ_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISEQ_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISEQ_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISEQ_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISEQ_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISEQ_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISEQ_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISEQ_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISEQ_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISEQ_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISEQ_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISNE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISNE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1028,78 +1284,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISNE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISNE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISNE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISNE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISNE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISNE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISNE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISNE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISNE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISNE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISNE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISNE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISNE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISNE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISNE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISNE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISNE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISNE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISNE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISNE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISNE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISNE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISNE_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISNE_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISNE_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISNE_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISNE_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISNE_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISNE_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISNE_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISNE_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISNE_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISNE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISNE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISNE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISNE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISNE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISNE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISNE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISNE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISNE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISNE_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISNE_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISNE_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISNE_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISNE_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISNE_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISNE_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISNE_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISNE_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISNE_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISNE_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISNE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISNE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISNE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISNE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISNE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISNE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISNE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISNE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISNE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISNE_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISNE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISNE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISNE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISNE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISNE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISNE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISNE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISNE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISNE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISNE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISGT_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISGT_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1108,78 +1382,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISGT_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISGT_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISGT_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISGT_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISGT_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISGT_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISGT_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISGT_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISGT_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISGT_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISGT_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISGT_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISGT_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISGT_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISGT_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISGT_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISGT_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISGT_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISGT_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISGT_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISGT_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISGT_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISGT_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISGT_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISGT_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISGT_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISGT_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISGT_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISGT_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISGT_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISGT_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISGT_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISGT_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISGT_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISGT_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISGT_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISGT_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISGT_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISGT_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISGT_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISGT_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISGT_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISGT_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISGT_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISGT_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISGT_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISGT_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISGT_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISGT_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISGT_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISGT_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISGT_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISGT_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISGT_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISGT_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISGT_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISGT_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISGT_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISGT_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISGT_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISGT_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISGT_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISGT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISGT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISGT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISGT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISGT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISGT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISGT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISGT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISGT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISGT_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISLT_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISLT_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1188,78 +1480,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISLT_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISLT_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISLT_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISLT_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISLT_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISLT_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISLT_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISLT_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISLT_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISLT_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISLT_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISLT_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISLT_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISLT_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISLT_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISLT_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISLT_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISLT_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISLT_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISLT_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISLT_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISLT_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISLT_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISLT_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISLT_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISLT_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISLT_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISLT_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISLT_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISLT_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISLT_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISLT_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISLT_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISLT_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISLT_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISLT_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISLT_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISLT_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISLT_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISLT_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISLT_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISLT_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISLT_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISLT_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISLT_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISLT_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISLT_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISLT_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISLT_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISLT_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISLT_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISLT_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISLT_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISLT_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISLT_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISLT_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISLT_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISLT_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISLT_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISLT_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISLT_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISLT_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISLT_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISLT_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISLT_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISLT_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISLT_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISLT_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISLT_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISLT_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISLT_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISLT_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISGE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISGE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1268,78 +1578,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISGE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISGE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISGE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISGE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISGE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISGE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISGE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISGE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISGE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISGE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISGE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISGE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISGE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISGE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISGE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISGE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISGE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISGE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISGE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISGE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISGE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISGE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISGE_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISGE_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISGE_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISGE_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISGE_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISGE_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISGE_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISGE_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISGE_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISGE_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISGE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISGE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISGE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISGE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISGE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISGE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISGE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISGE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISGE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISGE_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISGE_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISGE_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISGE_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISGE_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISGE_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISGE_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISGE_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISGE_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISGE_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISGE_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISGE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISGE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISGE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISGE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISGE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISGE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISGE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISGE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISGE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISGE_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISGE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISGE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISGE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISGE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISGE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISGE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISGE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISGE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISGE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISGE_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_ISLE_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_ISLE_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1348,78 +1676,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_ISLE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_ISLE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_ISLE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_ISLE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_ISLE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_ISLE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_ISLE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_ISLE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_ISLE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_ISLE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_ISLE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_ISLE_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_ISLE_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_ISLE_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_ISLE_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_ISLE_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_ISLE_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_ISLE_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_ISLE_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_ISLE_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_ISLE_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_ISLE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_ISLE_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_ISLE_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_ISLE_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_ISLE_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_ISLE_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_ISLE_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_ISLE_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_ISLE_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_ISLE_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_ISLE_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_ISLE_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_ISLE_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_ISLE_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_ISLE_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_ISLE_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_ISLE_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_ISLE_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_ISLE_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_ISLE_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_ISLE_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_ISLE_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_ISLE_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_ISLE_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_ISLE_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_ISLE_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_ISLE_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_ISLE_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_ISLE_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_ISLE_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_ISLE_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_ISLE_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_ISLE_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_ISLE_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_ISLE_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_ISLE_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_ISLE_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_ISLE_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_ISLE_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_ISLE_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_ISLE_FP64    ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_ISLE_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_ISLE_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_ISLE_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_ISLE_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_ISLE_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_ISLE_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_ISLE_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_ISLE_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_ISLE_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_ISLE_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_LOR_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_LOR_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1428,78 +1774,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_LOR_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MIN_LOR_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MIN_LOR_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MIN_LOR_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MIN_LOR_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MIN_LOR_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MIN_LOR_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MIN_LOR_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MIN_LOR_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MIN_LOR_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MIN_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MIN_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MIN_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LOR_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_LOR_INT8      ) ;
-                            case GB_UINT8_code : return (GxB_MAX_LOR_UINT8     ) ;
-                            case GB_INT16_code : return (GxB_MAX_LOR_INT16     ) ;
-                            case GB_UINT16_code: return (GxB_MAX_LOR_UINT16    ) ;
-                            case GB_INT32_code : return (GxB_MAX_LOR_INT32     ) ;
-                            case GB_UINT32_code: return (GxB_MAX_LOR_UINT32    ) ;
-                            case GB_INT64_code : return (GxB_MAX_LOR_INT64     ) ;
-                            case GB_UINT64_code: return (GxB_MAX_LOR_UINT64    ) ;
-                            case GB_FP32_code  : return (GxB_MAX_LOR_FP32      ) ;
-                            case GB_FP64_code  : return (GxB_MAX_LOR_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_MAX_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_MAX_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_MAX_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LOR_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_LOR_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_LOR_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_PLUS_LOR_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_LOR_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_PLUS_LOR_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_LOR_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_PLUS_LOR_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_LOR_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_LOR_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_LOR_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_LOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LOR_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_LOR_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_LOR_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_TIMES_LOR_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_LOR_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_TIMES_LOR_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_LOR_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_TIMES_LOR_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_LOR_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_LOR_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_LOR_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_LOR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LOR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LOR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LOR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LOR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LOR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LOR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LOR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LOR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LOR_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LOR_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LOR_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_ANY_LOR_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LOR_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_ANY_LOR_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LOR_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_ANY_LOR_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LOR_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LOR_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LOR_FP64       ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_LAND_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_LAND_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1508,78 +1872,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_LAND_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_LAND_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_LAND_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_LAND_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_LAND_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_LAND_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_LAND_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_LAND_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_LAND_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_LAND_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LAND_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_LAND_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_LAND_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_LAND_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_LAND_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_LAND_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_LAND_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_LAND_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_LAND_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_LAND_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_LAND_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LAND_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_LAND_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_LAND_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_LAND_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_LAND_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_LAND_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_LAND_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_LAND_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_LAND_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_LAND_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_LAND_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_LAND_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LAND_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LAND_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LAND_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LAND_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LAND_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LAND_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LAND_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LAND_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LAND_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_LAND_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_LAND_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_LAND_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_LAND_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_LAND_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_LAND_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_LAND_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_LAND_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_LAND_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_LAND_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_LAND_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LAND_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LAND_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LAND_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LAND_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LAND_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LAND_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LAND_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LAND_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LAND_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LAND_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LAND_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_LAND_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LAND_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_LAND_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LAND_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_LAND_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LAND_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LAND_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LAND_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_LXOR_opcode : // with (4 monoids) x (10 non-Boolean types)
+            case GB_LXOR_opcode : // with (5 monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1588,73 +1970,91 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MIN_LXOR_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MIN_LXOR_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MIN_LXOR_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MIN_LXOR_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MIN_LXOR_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MIN_LXOR_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MIN_LXOR_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MIN_LXOR_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MIN_LXOR_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MIN_LXOR_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MIN_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MIN_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MIN_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MIN_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MIN_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MIN_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MIN_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MIN_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MIN_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MIN_LXOR_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_MAX_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_MAX_LXOR_INT8     ) ;
-                            case GB_UINT8_code : return (GxB_MAX_LXOR_UINT8    ) ;
-                            case GB_INT16_code : return (GxB_MAX_LXOR_INT16    ) ;
-                            case GB_UINT16_code: return (GxB_MAX_LXOR_UINT16   ) ;
-                            case GB_INT32_code : return (GxB_MAX_LXOR_INT32    ) ;
-                            case GB_UINT32_code: return (GxB_MAX_LXOR_UINT32   ) ;
-                            case GB_INT64_code : return (GxB_MAX_LXOR_INT64    ) ;
-                            case GB_UINT64_code: return (GxB_MAX_LXOR_UINT64   ) ;
-                            case GB_FP32_code  : return (GxB_MAX_LXOR_FP32     ) ;
-                            case GB_FP64_code  : return (GxB_MAX_LXOR_FP64     ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_MAX_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_MAX_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_MAX_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_MAX_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_MAX_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_MAX_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_MAX_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_MAX_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_MAX_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_MAX_LXOR_FP64      ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_PLUS_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_PLUS_LXOR_INT8    ) ;
-                            case GB_UINT8_code : return (GxB_PLUS_LXOR_UINT8   ) ;
-                            case GB_INT16_code : return (GxB_PLUS_LXOR_INT16   ) ;
-                            case GB_UINT16_code: return (GxB_PLUS_LXOR_UINT16  ) ;
-                            case GB_INT32_code : return (GxB_PLUS_LXOR_INT32   ) ;
-                            case GB_UINT32_code: return (GxB_PLUS_LXOR_UINT32  ) ;
-                            case GB_INT64_code : return (GxB_PLUS_LXOR_INT64   ) ;
-                            case GB_UINT64_code: return (GxB_PLUS_LXOR_UINT64  ) ;
-                            case GB_FP32_code  : return (GxB_PLUS_LXOR_FP32    ) ;
-                            case GB_FP64_code  : return (GxB_PLUS_LXOR_FP64    ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_PLUS_LXOR_INT8     ) ;
+                            case GB_UINT8_code : return (GxB_PLUS_LXOR_UINT8    ) ;
+                            case GB_INT16_code : return (GxB_PLUS_LXOR_INT16    ) ;
+                            case GB_UINT16_code: return (GxB_PLUS_LXOR_UINT16   ) ;
+                            case GB_INT32_code : return (GxB_PLUS_LXOR_INT32    ) ;
+                            case GB_UINT32_code: return (GxB_PLUS_LXOR_UINT32   ) ;
+                            case GB_INT64_code : return (GxB_PLUS_LXOR_INT64    ) ;
+                            case GB_UINT64_code: return (GxB_PLUS_LXOR_UINT64   ) ;
+                            case GB_FP32_code  : return (GxB_PLUS_LXOR_FP32     ) ;
+                            case GB_FP64_code  : return (GxB_PLUS_LXOR_FP64     ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_TIMES_opcode :
 
                         switch (zcode)
                         {
-                            case GB_INT8_code  : return (GxB_TIMES_LXOR_INT8   ) ;
-                            case GB_UINT8_code : return (GxB_TIMES_LXOR_UINT8  ) ;
-                            case GB_INT16_code : return (GxB_TIMES_LXOR_INT16  ) ;
-                            case GB_UINT16_code: return (GxB_TIMES_LXOR_UINT16 ) ;
-                            case GB_INT32_code : return (GxB_TIMES_LXOR_INT32  ) ;
-                            case GB_UINT32_code: return (GxB_TIMES_LXOR_UINT32 ) ;
-                            case GB_INT64_code : return (GxB_TIMES_LXOR_INT64  ) ;
-                            case GB_UINT64_code: return (GxB_TIMES_LXOR_UINT64 ) ;
-                            case GB_FP32_code  : return (GxB_TIMES_LXOR_FP32   ) ;
-                            case GB_FP64_code  : return (GxB_TIMES_LXOR_FP64   ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_TIMES_LXOR_INT8    ) ;
+                            case GB_UINT8_code : return (GxB_TIMES_LXOR_UINT8   ) ;
+                            case GB_INT16_code : return (GxB_TIMES_LXOR_INT16   ) ;
+                            case GB_UINT16_code: return (GxB_TIMES_LXOR_UINT16  ) ;
+                            case GB_INT32_code : return (GxB_TIMES_LXOR_INT32   ) ;
+                            case GB_UINT32_code: return (GxB_TIMES_LXOR_UINT32  ) ;
+                            case GB_INT64_code : return (GxB_TIMES_LXOR_INT64   ) ;
+                            case GB_UINT64_code: return (GxB_TIMES_LXOR_UINT64  ) ;
+                            case GB_FP32_code  : return (GxB_TIMES_LXOR_FP32    ) ;
+                            case GB_FP64_code  : return (GxB_TIMES_LXOR_FP64    ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (zcode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LXOR_INT8      ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LXOR_UINT8     ) ;
+                            case GB_INT16_code : return (GxB_ANY_LXOR_INT16     ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LXOR_UINT16    ) ;
+                            case GB_INT32_code : return (GxB_ANY_LXOR_INT32     ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LXOR_UINT32    ) ;
+                            case GB_INT64_code : return (GxB_ANY_LXOR_INT64     ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LXOR_UINT64    ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LXOR_FP32      ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LXOR_FP64      ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
@@ -1666,7 +2066,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 240 semirings with TxT->bool multiply operators
+        // 300 semirings with TxT->bool multiply operators
         //----------------------------------------------------------------------
 
         // x,y are one of the 10 non-Boolean types, z is Boolean
@@ -1674,7 +2074,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
         switch (mult_opcode)
         {
 
-            case GB_EQ_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_EQ_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1683,78 +2083,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_EQ_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_EQ_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_EQ_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_EQ_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_EQ_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_EQ_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_EQ_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_EQ_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_EQ_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_EQ_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_EQ_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_EQ_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_EQ_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_EQ_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_EQ_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_EQ_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_EQ_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_EQ_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_EQ_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_EQ_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_EQ_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_EQ_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_EQ_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_EQ_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_EQ_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_EQ_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_EQ_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_EQ_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_EQ_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_EQ_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_EQ_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_EQ_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_EQ_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_EQ_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_EQ_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_EQ_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_EQ_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_EQ_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_EQ_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_EQ_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_EQ_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_EQ_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_EQ_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_EQ_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_EQ_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_EQ_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_EQ_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_EQ_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_EQ_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_EQ_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_EQ_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_EQ_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_EQ_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_EQ_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_EQ_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_EQ_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_EQ_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_EQ_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_EQ_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_EQ_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_EQ_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_EQ_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_EQ_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_EQ_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_EQ_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_EQ_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_EQ_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_EQ_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_EQ_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_EQ_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_EQ_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_EQ_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_EQ_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_EQ_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_EQ_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_EQ_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_EQ_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_EQ_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_EQ_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_EQ_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_EQ_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_EQ_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_EQ_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_EQ_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_EQ_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_EQ_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_EQ_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_EQ_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_EQ_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_EQ_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_NE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_NE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1763,78 +2181,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_NE_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_NE_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_NE_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_NE_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_NE_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_NE_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_NE_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_NE_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_NE_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_NE_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_NE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_NE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_NE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_NE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_NE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_NE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_NE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_NE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_NE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_NE_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_NE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_NE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_NE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_NE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_NE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_NE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_NE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_NE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_NE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_NE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_NE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_NE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_NE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_NE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_NE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_NE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_NE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_NE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_NE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_NE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_NE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_NE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_NE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_NE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_NE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_NE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_NE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_NE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_NE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_NE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_NE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_NE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_NE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_NE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_NE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_NE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_NE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_NE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_NE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_NE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_NE_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_NE_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_NE_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_NE_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_NE_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_NE_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_NE_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_NE_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_NE_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_NE_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_NE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_NE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_NE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_NE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_NE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_NE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_NE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_NE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_NE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_NE_FP64         ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_NE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_NE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_NE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_NE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_NE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_NE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_NE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_NE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_NE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_NE_FP64        ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
 
-            case GB_GT_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_GT_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1843,78 +2279,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_GT_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_GT_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_GT_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_GT_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_GT_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_GT_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_GT_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_GT_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_GT_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_GT_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_GT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_GT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_GT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_GT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_GT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_GT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_GT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_GT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_GT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_GT_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_GT_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_GT_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_GT_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_GT_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_GT_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_GT_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_GT_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_GT_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_GT_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_GT_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_GT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_GT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_GT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_GT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_GT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_GT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_GT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_GT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_GT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_GT_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_GT_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_GT_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_GT_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_GT_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_GT_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_GT_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_GT_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_GT_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_GT_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_GT_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_GT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_GT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_GT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_GT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_GT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_GT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_GT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_GT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_GT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_GT_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_GT_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_GT_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_GT_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_GT_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_GT_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_GT_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_GT_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_GT_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_GT_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_GT_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_GT_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_GT_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_GT_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_GT_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_GT_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_GT_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_GT_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_GT_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_GT_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_GT_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_GT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_GT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_GT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_GT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_GT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_GT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_GT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_GT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_GT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_GT_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_LT_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_LT_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -1923,78 +2377,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_LT_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_LT_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_LT_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_LT_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_LT_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_LT_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_LT_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_LT_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_LT_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_LT_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_LT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_LT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_LT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_LT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_LT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_LT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_LT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_LT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_LT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_LT_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_LT_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_LT_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_LT_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_LT_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_LT_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_LT_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_LT_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_LT_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_LT_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_LT_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_LT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_LT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_LT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_LT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_LT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_LT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_LT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_LT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_LT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_LT_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_LT_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_LT_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_LT_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_LT_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_LT_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_LT_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_LT_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_LT_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_LT_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_LT_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_LT_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_LT_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_LT_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_LT_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_LT_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_LT_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_LT_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_LT_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_LT_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_LT_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_LT_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_LT_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_LT_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_LT_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_LT_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_LT_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_LT_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_LT_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_LT_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_LT_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_LT_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_LT_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_LT_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_LT_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_LT_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_LT_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_LT_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_LT_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_LT_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_LT_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LT_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LT_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_LT_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LT_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_LT_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LT_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_LT_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LT_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LT_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LT_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_GE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_GE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -2003,78 +2475,96 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_GE_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_GE_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_GE_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_GE_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_GE_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_GE_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_GE_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_GE_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_GE_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_GE_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_GE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_GE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_GE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_GE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_GE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_GE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_GE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_GE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_GE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_GE_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_GE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_GE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_GE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_GE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_GE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_GE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_GE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_GE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_GE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_GE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_GE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_GE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_GE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_GE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_GE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_GE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_GE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_GE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_GE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_GE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_GE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_GE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_GE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_GE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_GE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_GE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_GE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_GE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_GE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_GE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_GE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_GE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_GE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_GE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_GE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_GE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_GE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_GE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_GE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_GE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_GE_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_GE_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_GE_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_GE_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_GE_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_GE_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_GE_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_GE_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_GE_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_GE_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_GE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_GE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_GE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_GE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_GE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_GE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_GE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_GE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_GE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_GE_FP64         ) ;
+                            default : ;
+                        }
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_GE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_GE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_GE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_GE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_GE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_GE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_GE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_GE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_GE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_GE_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     default : ;
                 }
 
-            case GB_LE_opcode : // with (4 bool monoids) x (10 non-bool types)
+            case GB_LE_opcode : // with (5 bool monoids) x (10 nonboolean types)
 
                 switch (add_opcode)
                 {
@@ -2083,73 +2573,91 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LOR_LE_INT8       ) ;
-                            case GB_UINT8_code :  return (GxB_LOR_LE_UINT8      ) ;
-                            case GB_INT16_code :  return (GxB_LOR_LE_INT16      ) ;
-                            case GB_UINT16_code:  return (GxB_LOR_LE_UINT16     ) ;
-                            case GB_INT32_code :  return (GxB_LOR_LE_INT32      ) ;
-                            case GB_UINT32_code:  return (GxB_LOR_LE_UINT32     ) ;
-                            case GB_INT64_code :  return (GxB_LOR_LE_INT64      ) ;
-                            case GB_UINT64_code:  return (GxB_LOR_LE_UINT64     ) ;
-                            case GB_FP32_code  :  return (GxB_LOR_LE_FP32       ) ;
-                            case GB_FP64_code  :  return (GxB_LOR_LE_FP64       ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LOR_LE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_LOR_LE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_LOR_LE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_LOR_LE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_LOR_LE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_LOR_LE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_LOR_LE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_LOR_LE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_LOR_LE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_LOR_LE_FP64        ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LAND_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LAND_LE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LAND_LE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LAND_LE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LAND_LE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LAND_LE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LAND_LE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LAND_LE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LAND_LE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LAND_LE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LAND_LE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LAND_LE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LAND_LE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LAND_LE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LAND_LE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LAND_LE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LAND_LE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LAND_LE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LAND_LE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LAND_LE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LAND_LE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_LXOR_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_LXOR_LE_INT8      ) ;
-                            case GB_UINT8_code :  return (GxB_LXOR_LE_UINT8     ) ;
-                            case GB_INT16_code :  return (GxB_LXOR_LE_INT16     ) ;
-                            case GB_UINT16_code:  return (GxB_LXOR_LE_UINT16    ) ;
-                            case GB_INT32_code :  return (GxB_LXOR_LE_INT32     ) ;
-                            case GB_UINT32_code:  return (GxB_LXOR_LE_UINT32    ) ;
-                            case GB_INT64_code :  return (GxB_LXOR_LE_INT64     ) ;
-                            case GB_UINT64_code:  return (GxB_LXOR_LE_UINT64    ) ;
-                            case GB_FP32_code  :  return (GxB_LXOR_LE_FP32      ) ;
-                            case GB_FP64_code  :  return (GxB_LXOR_LE_FP64      ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_LXOR_LE_INT8       ) ;
+                            case GB_UINT8_code : return (GxB_LXOR_LE_UINT8      ) ;
+                            case GB_INT16_code : return (GxB_LXOR_LE_INT16      ) ;
+                            case GB_UINT16_code: return (GxB_LXOR_LE_UINT16     ) ;
+                            case GB_INT32_code : return (GxB_LXOR_LE_INT32      ) ;
+                            case GB_UINT32_code: return (GxB_LXOR_LE_UINT32     ) ;
+                            case GB_INT64_code : return (GxB_LXOR_LE_INT64      ) ;
+                            case GB_UINT64_code: return (GxB_LXOR_LE_UINT64     ) ;
+                            case GB_FP32_code  : return (GxB_LXOR_LE_FP32       ) ;
+                            case GB_FP64_code  : return (GxB_LXOR_LE_FP64       ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
 
                     case GB_EQ_opcode :
 
                         switch (xycode)
                         {
-                            case GB_INT8_code  :  return (GxB_EQ_LE_INT8        ) ;
-                            case GB_UINT8_code :  return (GxB_EQ_LE_UINT8       ) ;
-                            case GB_INT16_code :  return (GxB_EQ_LE_INT16       ) ;
-                            case GB_UINT16_code:  return (GxB_EQ_LE_UINT16      ) ;
-                            case GB_INT32_code :  return (GxB_EQ_LE_INT32       ) ;
-                            case GB_UINT32_code:  return (GxB_EQ_LE_UINT32      ) ;
-                            case GB_INT64_code :  return (GxB_EQ_LE_INT64       ) ;
-                            case GB_UINT64_code:  return (GxB_EQ_LE_UINT64      ) ;
-                            case GB_FP32_code  :  return (GxB_EQ_LE_FP32        ) ;
-                            case GB_FP64_code  :  return (GxB_EQ_LE_FP64        ) ;
-                            default : ; 
+                            case GB_INT8_code  : return (GxB_EQ_LE_INT8         ) ;
+                            case GB_UINT8_code : return (GxB_EQ_LE_UINT8        ) ;
+                            case GB_INT16_code : return (GxB_EQ_LE_INT16        ) ;
+                            case GB_UINT16_code: return (GxB_EQ_LE_UINT16       ) ;
+                            case GB_INT32_code : return (GxB_EQ_LE_INT32        ) ;
+                            case GB_UINT32_code: return (GxB_EQ_LE_UINT32       ) ;
+                            case GB_INT64_code : return (GxB_EQ_LE_INT64        ) ;
+                            case GB_UINT64_code: return (GxB_EQ_LE_UINT64       ) ;
+                            case GB_FP32_code  : return (GxB_EQ_LE_FP32         ) ;
+                            case GB_FP64_code  : return (GxB_EQ_LE_FP64         ) ;
+                            default : ;
                         }
-                        break; 
+                        break ;
+
+                    case GB_ANY_opcode :
+
+                        switch (xycode)
+                        {
+                            case GB_INT8_code  : return (GxB_ANY_LE_INT8        ) ;
+                            case GB_UINT8_code : return (GxB_ANY_LE_UINT8       ) ;
+                            case GB_INT16_code : return (GxB_ANY_LE_INT16       ) ;
+                            case GB_UINT16_code: return (GxB_ANY_LE_UINT16      ) ;
+                            case GB_INT32_code : return (GxB_ANY_LE_INT32       ) ;
+                            case GB_UINT32_code: return (GxB_ANY_LE_UINT32      ) ;
+                            case GB_INT64_code : return (GxB_ANY_LE_INT64       ) ;
+                            case GB_UINT64_code: return (GxB_ANY_LE_UINT64      ) ;
+                            case GB_FP32_code  : return (GxB_ANY_LE_FP32        ) ;
+                            case GB_FP64_code  : return (GxB_ANY_LE_FP64        ) ;
+                            default : ;
+                        }
+                        break ;
 
                     default : ;
                 }
@@ -2161,7 +2669,7 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
     {
 
         //----------------------------------------------------------------------
-        // 40 purely Boolean semirings
+        // 55 purely Boolean semirings
         //----------------------------------------------------------------------
 
         // x,y,z are all Boolean, and all operators are Boolean
@@ -2173,10 +2681,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_FIRST_BOOL    ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_FIRST_BOOL   ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_FIRST_BOOL   ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_FIRST_BOOL     ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_FIRST_BOOL     ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_FIRST_BOOL    ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_FIRST_BOOL    ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_FIRST_BOOL      ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_FIRST_BOOL     ) ;
                     default : ;
                 }
 
@@ -2184,10 +2693,23 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_SECOND_BOOL   ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_SECOND_BOOL  ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_SECOND_BOOL  ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_SECOND_BOOL    ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_SECOND_BOOL    ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_SECOND_BOOL   ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_SECOND_BOOL   ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_SECOND_BOOL     ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_SECOND_BOOL    ) ;
+                    default : ;
+                }
+
+            case GB_PAIR_opcode :
+
+                switch (add_opcode)
+                {
+                    case GB_LOR_opcode        : return (GxB_LOR_PAIR_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_PAIR_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_PAIR_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_PAIR_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_PAIR_BOOL      ) ;
                     default : ;
                 }
 
@@ -2195,10 +2717,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_LOR_BOOL      ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_LOR_BOOL     ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_LOR_BOOL     ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_LOR_BOOL       ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LOR_BOOL       ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LOR_BOOL      ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LOR_BOOL      ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LOR_BOOL        ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LOR_BOOL       ) ;
                     default : ;
                 }
 
@@ -2206,10 +2729,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_LAND_BOOL     ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_LAND_BOOL    ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_LAND_BOOL    ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_LAND_BOOL      ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LAND_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LAND_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LAND_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LAND_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LAND_BOOL      ) ;
                     default : ;
                 }
 
@@ -2217,10 +2741,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_LXOR_BOOL     ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_LXOR_BOOL    ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_LXOR_BOOL    ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_LXOR_BOOL      ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LXOR_BOOL      ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LXOR_BOOL     ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LXOR_BOOL     ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LXOR_BOOL       ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LXOR_BOOL      ) ;
                     default : ;
                 }
 
@@ -2228,10 +2753,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_EQ_BOOL       ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_EQ_BOOL      ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_EQ_BOOL      ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_EQ_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_EQ_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_EQ_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_EQ_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_EQ_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_EQ_BOOL        ) ;
                     default : ;
                 }
 
@@ -2239,10 +2765,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_GT_BOOL       ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_GT_BOOL      ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_GT_BOOL      ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_GT_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_GT_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_GT_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_GT_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_GT_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_GT_BOOL        ) ;
                     default : ;
                 }
 
@@ -2250,10 +2777,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_LT_BOOL       ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_LT_BOOL      ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_LT_BOOL      ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_LT_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LT_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LT_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LT_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LT_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LT_BOOL        ) ;
                     default : ;
                 }
 
@@ -2261,10 +2789,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_GE_BOOL       ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_GE_BOOL      ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_GE_BOOL      ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_GE_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_GE_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_GE_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_GE_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_GE_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_GE_BOOL        ) ;
                     default : ;
                 }
 
@@ -2272,10 +2801,11 @@ GrB_Semiring GB_mx_builtin_semiring // built-in semiring, or NULL if error
 
                 switch (add_opcode)
                 {
-                    case GB_LOR_opcode        :  return (GxB_LOR_LE_BOOL       ) ;
-                    case GB_LAND_opcode       :  return (GxB_LAND_LE_BOOL      ) ;
-                    case GB_LXOR_opcode       :  return (GxB_LXOR_LE_BOOL      ) ;
-                    case GB_EQ_opcode         :  return (GxB_EQ_LE_BOOL        ) ;
+                    case GB_LOR_opcode        : return (GxB_LOR_LE_BOOL        ) ;
+                    case GB_LAND_opcode       : return (GxB_LAND_LE_BOOL       ) ;
+                    case GB_LXOR_opcode       : return (GxB_LXOR_LE_BOOL       ) ;
+                    case GB_EQ_opcode         : return (GxB_EQ_LE_BOOL         ) ;
+                    case GB_ANY_opcode        : return (GxB_ANY_LE_BOOL        ) ;
                     default : ;
                 }
 
diff --git a/Test/GB_mx_classID_to_Type.c b/Test/GB_mx_classID_to_Type.c
index 3f45744225..2e127b93ab 100644
--- a/Test/GB_mx_classID_to_Type.c
+++ b/Test/GB_mx_classID_to_Type.c
@@ -2,7 +2,7 @@
 // GB_mx_classID_to_Type: get GraphBLAS type of the corresponding MATLAB class
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_classID_to_string.c b/Test/GB_mx_classID_to_string.c
index 7f964a015e..0ad6611493 100644
--- a/Test/GB_mx_classID_to_string.c
+++ b/Test/GB_mx_classID_to_string.c
@@ -2,7 +2,7 @@
 // GB_mx_classID_to_string: return a MATLAB string from the class ID
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_clear_time.c b/Test/GB_mx_clear_time.c
index 1c4f239883..e2aa2833e7 100644
--- a/Test/GB_mx_clear_time.c
+++ b/Test/GB_mx_clear_time.c
@@ -2,7 +2,7 @@
 // GB_mx_clear_time: clear the time and start the timer
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_complex_merge.c b/Test/GB_mx_complex_merge.c
index 52c340f4fc..56d3a4ffd8 100644
--- a/Test/GB_mx_complex_merge.c
+++ b/Test/GB_mx_complex_merge.c
@@ -2,7 +2,7 @@
 // GB_mx_complex_merge: merge a MATLAB complex mxArray
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_complex_split.c b/Test/GB_mx_complex_split.c
index c346919393..1f013dda7a 100644
--- a/Test/GB_mx_complex_split.c
+++ b/Test/GB_mx_complex_split.c
@@ -2,7 +2,7 @@
 // GB_mx_complex_split: split a MATLAB complex mxArray
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_get_global.c b/Test/GB_mx_get_global.c
index 4078adb9c0..48fd092b86 100644
--- a/Test/GB_mx_get_global.c
+++ b/Test/GB_mx_get_global.c
@@ -2,7 +2,7 @@
 // GB_mx_get_global: get the GraphBLAS thread-local storage from MATLAB
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -44,7 +44,6 @@ bool GB_mx_get_global       // true if doing malloc_debug
         // if (malloc_debug) printf ("GraphBLAS malloc debug enabled\n") ;
     }
 
-
     //--------------------------------------------------------------------------
     // clear the time
     //--------------------------------------------------------------------------
@@ -63,12 +62,14 @@ bool GB_mx_get_global       // true if doing malloc_debug
     // initialize GraphBLAS
     //--------------------------------------------------------------------------
 
+    bool burble = GB_Global_burble_get ( ) ;            // save current burble
     GB_Global_GrB_init_called_set (false) ;
     GxB_init (GrB_NONBLOCKING, mxMalloc, mxCalloc, mxRealloc, mxFree, false) ;
     ASSERT (GB_Global_nmalloc_get ( ) == 0) ;
     GB_Global_abort_function_set (GB_mx_abort) ;
     GB_Global_malloc_tracking_set (true) ;
     GxB_set (GxB_FORMAT, GxB_BY_COL) ;
+    GxB_set (GxB_BURBLE, burble) ;                      // restore the burble
 
     //--------------------------------------------------------------------------
     // get nthreads
diff --git a/Test/GB_mx_isequal.c b/Test/GB_mx_isequal.c
index 35c9f5f617..af4fcedb1a 100644
--- a/Test/GB_mx_isequal.c
+++ b/Test/GB_mx_isequal.c
@@ -2,7 +2,7 @@
 // GB_mx_isequal: check if two matrices are equal
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -12,7 +12,10 @@
 bool GB_mx_isequal     // true if A and B are exactly the same
 (
     GrB_Matrix A,
-    GrB_Matrix B
+    GrB_Matrix B,
+    double eps      // if A and B are both FP32 or FP64, and if eps > 0,
+                    // then the values are considered equal if their relative
+                    // difference is less than or equal to eps.
 )
 {
     // printf ("mx_isequal\n") ;
@@ -80,8 +83,22 @@ bool GB_mx_isequal     // true if A and B are exactly the same
         if (!GB_mx_same  ((char *) A->i, (char *) B->i, nnz * s))
             return (false) ;
         // printf ("i same\n") ;
-        if (!GB_mx_xsame (A->x, B->x, nnz, asize, A->i))
-            return (false) ;
+
+        if (A->type == GrB_FP32 && eps > 0)
+        {
+            if (!GB_mx_xsame32 (A->x, B->x, nnz, A->i, eps))
+                return (false) ;
+        }
+        else if (A->type == GrB_FP64 && eps > 0)
+        {
+            if (!GB_mx_xsame64 (A->x, B->x, nnz, A->i, eps))
+                return (false) ;
+        }
+        else
+        {
+            if (!GB_mx_xsame (A->x, B->x, nnz, asize, A->i))
+                return (false) ;
+        }
         // printf ("x same\n") ;
     }
 
diff --git a/Test/GB_mx_mxArray_to_BinaryOp.c b/Test/GB_mx_mxArray_to_BinaryOp.c
index 5df964cf04..30a0788a3e 100644
--- a/Test/GB_mx_mxArray_to_BinaryOp.c
+++ b/Test/GB_mx_mxArray_to_BinaryOp.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_BinaryOp
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_mxArray_to_Descriptor.c b/Test/GB_mx_mxArray_to_Descriptor.c
index e377f84f60..97800ed604 100644
--- a/Test/GB_mx_mxArray_to_Descriptor.c
+++ b/Test/GB_mx_mxArray_to_Descriptor.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_Descriptor: get the contents of a GraphBLAS Descriptor
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -66,14 +66,27 @@ static bool get_descriptor
         {
             info = GxB_set (D, field, GrB_TRAN) ;
         }
-        else if (MATCH (s, "scmp") || MATCH (s, "complement"))
+        else if (MATCH (s, "scmp") || MATCH (s, "complement")
+              || MATCH (s, "comp"))
         {
-            info = GxB_set (D, field, GrB_SCMP) ;
+            info = GxB_set (D, field, GrB_COMP) ;
+        }
+        else if (MATCH (s, "structure") || MATCH (s, "structural"))
+        {
+            info = GxB_set (D, field, GrB_STRUCTURE) ;
+        }
+        else if (MATCH (s, "structural complement"))
+        {
+            info = GxB_set (D, field, GrB_COMP + GrB_STRUCTURE) ;
         }
         else if (MATCH (s, "replace"))
         {
             info = GxB_set (D, field, GrB_REPLACE) ;
         }
+        else if (MATCH (s, "saxpy"))
+        {
+            info = GxB_set (D, field, GxB_AxB_SAXPY) ;
+        }
         else if (MATCH (s, "gustavson"))
         {
             info = GxB_set (D, field, GxB_AxB_GUSTAVSON) ;
@@ -86,6 +99,10 @@ static bool get_descriptor
         {
             info = GxB_set (D, field, GxB_AxB_HEAP) ;
         }
+        else if (MATCH (s, "hash"))
+        {
+            info = GxB_set (D, field, GxB_AxB_HASH) ;
+        }
         else
         {
             // the string must be one of the four strings listed above
diff --git a/Test/GB_mx_mxArray_to_Matrix.c b/Test/GB_mx_mxArray_to_Matrix.c
index 8bc99386b5..804e0024cb 100644
--- a/Test/GB_mx_mxArray_to_Matrix.c
+++ b/Test/GB_mx_mxArray_to_Matrix.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_Matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -291,11 +291,7 @@ GrB_Matrix GB_mx_mxArray_to_Matrix     // returns GraphBLAS version of A
                 return (NULL) ;
             }
         }
-        if ((atype_in == Complex)
-            #ifdef MY_COMPLEX
-            || (atype_in == My_Complex)
-            #endif
-            )
+        if (atype_in == Complex)
         {
             // copy the real part (Mx) and imaginary part (Mz) into A->x
             GB_mx_complex_merge (anz, (double *) (A->x), Amatrix) ;
diff --git a/Test/GB_mx_mxArray_to_SelectOp.c b/Test/GB_mx_mxArray_to_SelectOp.c
index 08775eb931..98c0005834 100644
--- a/Test/GB_mx_mxArray_to_SelectOp.c
+++ b/Test/GB_mx_mxArray_to_SelectOp.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_SelectOp
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_mxArray_to_Semiring.c b/Test/GB_mx_mxArray_to_Semiring.c
index c1d74c9a42..4aa2421d2e 100644
--- a/Test/GB_mx_mxArray_to_Semiring.c
+++ b/Test/GB_mx_mxArray_to_Semiring.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_Semiring
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -89,9 +89,10 @@ bool GB_mx_mxArray_to_Semiring         // true if successful
     bool zbool ;
     switch (multiply_opcode)
     {
-        // 10 z=f(x,y), all x,y,z the same type
+        // 11 z=f(x,y), all x,y,z the same type
         case GB_FIRST_opcode   : zbool = false ; break ;
         case GB_SECOND_opcode  : zbool = false ; break ;
+        case GB_PAIR_opcode    : zbool = false ; break ;
         case GB_MIN_opcode     : zbool = false ; break ;
         case GB_MAX_opcode     : zbool = false ; break ;
         case GB_PLUS_opcode    : zbool = false ; break ;
@@ -153,7 +154,7 @@ bool GB_mx_mxArray_to_Semiring         // true if successful
         return (false) ;
     }
 
-    ASSERT_MONOID_OK (add, "semiring add", GB0) ;
+    ASSERT_BINARYOP_OK (add, "semiring add", GB0) ;
     ASSERT_BINARYOP_OK (multiply, "semiring multiply", GB0) ;
 
     // create the monoid with the add operator and its identity value
diff --git a/Test/GB_mx_mxArray_to_UnaryOp.c b/Test/GB_mx_mxArray_to_UnaryOp.c
index 809279d9db..c9b7f58b29 100644
--- a/Test/GB_mx_mxArray_to_UnaryOp.c
+++ b/Test/GB_mx_mxArray_to_UnaryOp.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_UnaryOp
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_mxArray_to_Vector.c b/Test/GB_mx_mxArray_to_Vector.c
index db42f6822d..8eb3604988 100644
--- a/Test/GB_mx_mxArray_to_Vector.c
+++ b/Test/GB_mx_mxArray_to_Vector.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_Vector
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_mxArray_to_array.c b/Test/GB_mx_mxArray_to_array.c
index 39da8950bc..b85f42f4f6 100644
--- a/Test/GB_mx_mxArray_to_array.c
+++ b/Test/GB_mx_mxArray_to_array.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_array: get a dense numerical MATLAB array
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_mxArray_to_indices.c b/Test/GB_mx_mxArray_to_indices.c
index ff1508654f..6570882aee 100644
--- a/Test/GB_mx_mxArray_to_indices.c
+++ b/Test/GB_mx_mxArray_to_indices.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_indices
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -33,7 +33,6 @@ bool GB_mx_mxArray_to_indices       // true if successful, false otherwise
         (*handle) = I ;
         (*I_is_list) = false ;
         // Icolon not used
-        // printf ("got index (:)\n") ;
     }
     else
     {
@@ -69,8 +68,6 @@ bool GB_mx_mxArray_to_indices       // true if successful, false otherwise
             {
                 (*ni) = GxB_RANGE ;
                 Icolon [GxB_INC] = 1 ;
-                // printf ("got range ("GBd":"GBd")\n",
-                //     Icolon [GxB_BEGIN], Icolon [GxB_END]) ;
             }
             else
             {
@@ -96,8 +93,6 @@ bool GB_mx_mxArray_to_indices       // true if successful, false otherwise
                     (*ni) = GxB_BACKWARDS ;
                     Icolon [GxB_INC] = -iinc ;
                 }
-                // printf ("got stride ("GBd":"GBd":"GBd")\n",
-                //     Icolon [GxB_BEGIN], Icolon [GxB_INC], Icolon [GxB_END]) ;
             }
             (*handle) = Icolon ;
 
@@ -114,7 +109,6 @@ bool GB_mx_mxArray_to_indices       // true if successful, false otherwise
             I = mxGetData (I_matlab) ;
             (*ni) = (uint64_t) mxGetNumberOfElements (I_matlab) ;
             (*handle) = I ;
-            // printf ("got index list, size "GBd"\n", (int64_t) (*ni)) ;
         }
     }
 
diff --git a/Test/GB_mx_mxArray_to_string.c b/Test/GB_mx_mxArray_to_string.c
index c6950e3bd9..2606a96e78 100644
--- a/Test/GB_mx_mxArray_to_string.c
+++ b/Test/GB_mx_mxArray_to_string.c
@@ -2,7 +2,7 @@
 // GB_mx_mxArray_to_string.c: get a MATLAB string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_object_to_mxArray.c b/Test/GB_mx_object_to_mxArray.c
index febe6bacf2..bd91d65a7a 100644
--- a/Test/GB_mx_object_to_mxArray.c
+++ b/Test/GB_mx_object_to_mxArray.c
@@ -2,7 +2,7 @@
 // GB_mx_object_to_mxArray
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -124,11 +124,7 @@ mxArray *GB_mx_object_to_mxArray   // returns the MATLAB mxArray
         AS_IF_FREE (C->x) ;   // unlink C->x from C since it's now in MATLAB C
 
     }
-    else if (C->type == Complex
-        #ifdef MY_COMPLEX
-        || C->type == My_Complex
-        #endif
-        )
+    else if (C->type == Complex)
     {
 
         // user-defined Complex type
diff --git a/Test/GB_mx_put_global.c b/Test/GB_mx_put_global.c
index 94088dab31..06dbcbb6f7 100644
--- a/Test/GB_mx_put_global.c
+++ b/Test/GB_mx_put_global.c
@@ -2,7 +2,7 @@
 // GB_mx_put_global: put the GraphBLAS status in MATLAB workspace
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_put_time.c b/Test/GB_mx_put_time.c
index 7d59bd705b..c7df1c8159 100644
--- a/Test/GB_mx_put_time.c
+++ b/Test/GB_mx_put_time.c
@@ -2,7 +2,7 @@
 // GB_mx_put_time: put the time back to the global MATLAB workspace
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_same.c b/Test/GB_mx_same.c
index cfcae5db2a..dcc6fe604d 100644
--- a/Test/GB_mx_same.c
+++ b/Test/GB_mx_same.c
@@ -2,7 +2,7 @@
 // GB_mx_same: check if two arrays are equal
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_string_to_BinaryOp.c b/Test/GB_mx_string_to_BinaryOp.c
index 3dad08a0a3..c5a17a3285 100644
--- a/Test/GB_mx_string_to_BinaryOp.c
+++ b/Test/GB_mx_string_to_BinaryOp.c
@@ -2,7 +2,7 @@
 // GB_mx_string_to_BinaryOp.c: get a GraphBLAS operator from MATLAB strings
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -10,7 +10,7 @@
 #include "GB_mex.h"
 
 // opname_mx: a MATLAB string defining the operator name (25 kinds):
-// 10: first, second, min, max, plus, minus, rminus, times, div, rdiv
+// 10: first, second, pair, min, max, plus, minus, rminus, times, div, rdiv
 //  6: iseq, isne, isgt, islt, isge, isle,
 //  6: eq, ne, gt, lt, ge, le,
 //  3: or, and, xor
@@ -73,7 +73,7 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
         //----------------------------------------------------------------------
 
         // user-defined Complex binary operator
-        opcode  = GB_USER_R_opcode ;    // generic user-defined opcode
+        opcode  = GB_USER_opcode ;      // user-defined opcode
         opclass = mxDOUBLE_CLASS ;      // MATLAB class for complex
 
         if (len == 0)
@@ -81,9 +81,11 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
             op = NULL ;                 // no default Complex operator
         }
 
-        // 10 binary operators z=f(x,y), all x,y,z are Complex
+        // 12 binary operators z=f(x,y), all x,y,z are Complex
         else if (MATCH (opname, "first"   )) { op = Complex_first  ; }
         else if (MATCH (opname, "second"  )) { op = Complex_second ; }
+        else if (MATCH (opname, "pair"    )) { op = Complex_pair   ; }
+        else if (MATCH (opname, "any"     )) { op = Complex_second ; }
         else if (MATCH (opname, "min"     )) { op = Complex_min    ; }
         else if (MATCH (opname, "max"     )) { op = Complex_max    ; }
         else if (MATCH (opname, "plus"    )) { op = Complex_plus   ; }
@@ -136,9 +138,11 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
             opcode = default_opcode ;
         }
 
-        // 10 binary operators z=f(x,y), all x,y,z of the same type
+        // 12 binary operators z=f(x,y), all x,y,z of the same type
         else if (MATCH (opname, "first"   )) { opcode = GB_FIRST_opcode ; }
         else if (MATCH (opname, "second"  )) { opcode = GB_SECOND_opcode ; }
+        else if (MATCH (opname, "pair"    )) { opcode = GB_PAIR_opcode ; }
+        else if (MATCH (opname, "any"     )) { opcode = GB_ANY_opcode ; }
         else if (MATCH (opname, "min"     )) { opcode = GB_MIN_opcode ; }
         else if (MATCH (opname, "max"     )) { opcode = GB_MAX_opcode ; }
         else if (MATCH (opname, "plus"    )) { opcode = GB_PLUS_opcode ; }
@@ -174,23 +178,23 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
         {
             // z = complex(x,y) = x + i*y
             op = Complex_complex ;
-            opcode = GB_USER_R_opcode ;
+            opcode = GB_USER_opcode ;
             opclass = mxDOUBLE_CLASS ;
         }
 
         else
         {
-            mexWarnMsgIdAndTxt ("GB:warn", "unrecognised function name") ;
+            mexWarnMsgIdAndTxt ("GB:warn", "unrecognized function name") ;
             return (false) ;
         }
 
-        if (opcode != GB_USER_R_opcode)
+        if (opcode != GB_USER_opcode)
         {
             // get the opclass from the opclass_mx string, if present
             opclass = GB_mx_string_to_classID (opclass, opclass_mx) ;
             if (opclass == mxUNKNOWN_CLASS)
             {
-                mexWarnMsgIdAndTxt ("GB:warn", "unrecognised op class") ;
+                mexWarnMsgIdAndTxt ("GB:warn", "unrecognized op class") ;
                 return (false) ;
             }
         }
@@ -240,6 +244,48 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
                 }
                 break ;
 
+            case GB_ANY_opcode:
+
+                switch (opclass)
+                {
+                    case mxLOGICAL_CLASS : op = GxB_ANY_BOOL   ; break ;
+                    case mxINT8_CLASS    : op = GxB_ANY_INT8   ; break ;
+                    case mxUINT8_CLASS   : op = GxB_ANY_UINT8  ; break ;
+                    case mxINT16_CLASS   : op = GxB_ANY_INT16  ; break ;
+                    case mxUINT16_CLASS  : op = GxB_ANY_UINT16 ; break ;
+                    case mxINT32_CLASS   : op = GxB_ANY_INT32  ; break ;
+                    case mxUINT32_CLASS  : op = GxB_ANY_UINT32 ; break ;
+                    case mxINT64_CLASS   : op = GxB_ANY_INT64  ; break ;
+                    case mxUINT64_CLASS  : op = GxB_ANY_UINT64 ; break ;
+                    case mxSINGLE_CLASS  : op = GxB_ANY_FP32   ; break ;
+                    case mxDOUBLE_CLASS  : op = GxB_ANY_FP64   ; break ;
+                    default              : 
+                        mexWarnMsgIdAndTxt ("GB:warn","unknown type") ;
+                        return (false) ;
+                }
+                break ;
+
+            case GB_PAIR_opcode:
+
+                switch (opclass)
+                {
+                    case mxLOGICAL_CLASS : op = GxB_PAIR_BOOL   ; break ;
+                    case mxINT8_CLASS    : op = GxB_PAIR_INT8   ; break ;
+                    case mxUINT8_CLASS   : op = GxB_PAIR_UINT8  ; break ;
+                    case mxINT16_CLASS   : op = GxB_PAIR_INT16  ; break ;
+                    case mxUINT16_CLASS  : op = GxB_PAIR_UINT16 ; break ;
+                    case mxINT32_CLASS   : op = GxB_PAIR_INT32  ; break ;
+                    case mxUINT32_CLASS  : op = GxB_PAIR_UINT32 ; break ;
+                    case mxINT64_CLASS   : op = GxB_PAIR_INT64  ; break ;
+                    case mxUINT64_CLASS  : op = GxB_PAIR_UINT64 ; break ;
+                    case mxSINGLE_CLASS  : op = GxB_PAIR_FP32   ; break ;
+                    case mxDOUBLE_CLASS  : op = GxB_PAIR_FP64   ; break ;
+                    default              : 
+                        mexWarnMsgIdAndTxt ("GB:warn","unknown type") ;
+                        return (false) ;
+                }
+                break ;
+
             case GB_MIN_opcode   :
 
                 switch (opclass)
@@ -725,9 +771,8 @@ bool GB_mx_string_to_BinaryOp          // true if successful, false otherwise
                 }
                 break ;
 
-            case GB_NOP_opcode   :
-            case GB_USER_C_opcode   :
-            case GB_USER_R_opcode   :
+            case GB_NOP_opcode  :
+            case GB_USER_opcode :
 
                 // no operation is requested so return NULL, or user-defined
                 break ;
diff --git a/Test/GB_mx_string_to_UnaryOp.c b/Test/GB_mx_string_to_UnaryOp.c
index 58dbbfba40..0770a9a541 100644
--- a/Test/GB_mx_string_to_UnaryOp.c
+++ b/Test/GB_mx_string_to_UnaryOp.c
@@ -2,7 +2,7 @@
 // GB_mx_string_to_UnaryOp.c: get a GraphBLAS operator from MATLAB strings
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
@@ -11,10 +11,8 @@
 
 // opname_mx: a MATLAB string defining the operator name (built-in):
 //      'one', 'identity', 'ainv', 'abs', 'minv', 'not'
-// or a user-defined operator defined at run-time:
+// or a user-defined operator:
 //      'conj', 'real', 'imag', 'cabs', 'angle', 'complex_real', 'complex_imag'
-// or a user-defined operator defined at compile-time:
-//      'my_scale'
 
 // default_opcode: default if opname_mx is NULL
 
@@ -69,7 +67,7 @@ bool GB_mx_string_to_UnaryOp           // true if successful, false otherwise
         //----------------------------------------------------------------------
 
         // user-defined Complex unary operator
-        opcode  = GB_USER_R_opcode ;    // generic user-defined opcode
+        opcode  = GB_USER_opcode ;      // user-defined opcode
         opclass = mxDOUBLE_CLASS ;      // MATLAB class for complex
 
         if (len == 0)
@@ -128,43 +126,29 @@ bool GB_mx_string_to_UnaryOp           // true if successful, false otherwise
         { 
             // z = cmplx (x,0), convert x double to real part of Complex z
             op = Complex_complex_real ;
-            opcode = GB_USER_R_opcode ;
+            opcode = GB_USER_opcode ;
             opclass = mxDOUBLE_CLASS ;
         }
         else if (MATCH (opname, "complex_imag" ))
         { 
             // z = cmplx (0,x), convert x double to imag part of Complex z
             op = Complex_complex_imag ;
-            opcode = GB_USER_R_opcode ;
+            opcode = GB_USER_opcode ;
             opclass = mxDOUBLE_CLASS ;
         }
-
-        #ifdef MY_SCALE
-
-        else if (MATCH (opname, "my_scale" ))
-        { 
-            // z = my_scalar*x; default value of my_scalar is 2
-            op = My_scale ;
-            opcode = GB_USER_C_opcode ;
-            opclass = mxDOUBLE_CLASS ;
-            my_scalar = 2 ;
-        }
-
-        #endif
-
         else
         {
-            mexWarnMsgIdAndTxt ("GB:warn", "unrecognised function name") ;
+            mexWarnMsgIdAndTxt ("GB:warn", "unrecognized function name") ;
             return (false) ;
         }
 
-        if (opcode < GB_USER_C_opcode)
+        if (opcode < GB_USER_opcode)
         {
             // get the opclass from the opclass_mx string, if present
             opclass = GB_mx_string_to_classID (opclass, opclass_mx) ;
             if (opclass == mxUNKNOWN_CLASS)
             {
-                mexWarnMsgIdAndTxt ("GB:warn", "unrecognised op class") ;
+                mexWarnMsgIdAndTxt ("GB:warn", "unrecognized op class") ;
                 return (false) ;
             }
         }
@@ -298,9 +282,8 @@ bool GB_mx_string_to_UnaryOp           // true if successful, false otherwise
                 }
                 break ;
 
-            case GB_NOP_opcode   :
-            case GB_USER_R_opcode   :
-            case GB_USER_C_opcode   :
+            case GB_NOP_opcode  :
+            case GB_USER_opcode :
 
                 // no operation is requested so return NULL, or user-defined
                 break ;
diff --git a/Test/GB_mx_string_to_classID.c b/Test/GB_mx_string_to_classID.c
index 302651d55e..2927b128b4 100644
--- a/Test/GB_mx_string_to_classID.c
+++ b/Test/GB_mx_string_to_classID.c
@@ -2,7 +2,7 @@
 // GB_mx_string_to_classid.c: return the class ID from a class string
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_xsame.c b/Test/GB_mx_xsame.c
index e7eee9916d..211577b1dc 100644
--- a/Test/GB_mx_xsame.c
+++ b/Test/GB_mx_xsame.c
@@ -2,7 +2,7 @@
 // GB_mx_xsame: check if two arrays are equal (ignoring zombies)
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/GB_mx_xsame32.c b/Test/GB_mx_xsame32.c
new file mode 100644
index 0000000000..eb30476e45
--- /dev/null
+++ b/Test/GB_mx_xsame32.c
@@ -0,0 +1,51 @@
+//------------------------------------------------------------------------------
+// GB_mx_xsame32: check if two FP32 arrays are equal (ignoring zombies)
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+bool GB_mx_xsame32  // true if arrays X and Y are the same (ignoring zombies)
+(
+    float *X,
+    float *Y,
+    int64_t len,    // length of X and Y
+    int64_t *I,     // row indices (for zombies), same length as X and Y
+    float eps       // error tolerance allowed (eps > 0)
+)
+{
+    if (X == Y) return (true) ;
+    if (X == NULL) return (false) ;
+    if (Y == NULL) return (false) ;
+    if (I == NULL) return (false) ;
+    for (int64_t i = 0 ; i < len ; i++)
+    {
+        // check X [i] and Y [i], but ignore zombies
+        if (I [i] >= 0)
+        {
+            int xclass = fpclassify (X [i]) ;
+            if (xclass != fpclassify (Y [i])) return (false) ;
+            if (xclass == FP_ZERO)
+            {
+                // both are zero, which is OK
+            }
+            else if (xclass == FP_INFINITE)
+            {
+                // + or -infinity
+                if (X [i] != Y [i]) return (false) ;
+            }
+            else if (xclass != FP_NAN)
+            {
+                // both are normal or subnormal, and nonzero
+                float err = fabsf (X [i] - Y [i]) / fabsf (X [i]) ;
+                if (err > eps) return (false) ;
+            }
+        }
+    }
+    return (true) ;
+}
+
diff --git a/Test/GB_mx_xsame64.c b/Test/GB_mx_xsame64.c
new file mode 100644
index 0000000000..80bc1cb30a
--- /dev/null
+++ b/Test/GB_mx_xsame64.c
@@ -0,0 +1,51 @@
+//------------------------------------------------------------------------------
+// GB_mx_xsame64: check if two FP64 arrays are equal (ignoring zombies)
+//------------------------------------------------------------------------------
+
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+//------------------------------------------------------------------------------
+
+#include "GB_mex.h"
+
+bool GB_mx_xsame64  // true if arrays X and Y are the same (ignoring zombies)
+(
+    double *X,
+    double *Y,
+    int64_t len,    // length of X and Y
+    int64_t *I,     // row indices (for zombies), same length as X and Y
+    double eps      // error tolerance allowed (eps > 0)
+)
+{
+    if (X == Y) return (true) ;
+    if (X == NULL) return (false) ;
+    if (Y == NULL) return (false) ;
+    if (I == NULL) return (false) ;
+    for (int64_t i = 0 ; i < len ; i++)
+    {
+        // check X [i] and Y [i], but ignore zombies
+        if (I [i] >= 0)
+        {
+            int xclass = fpclassify (X [i]) ;
+            if (xclass != fpclassify (Y [i])) return (false) ;
+            if (xclass == FP_ZERO)
+            {
+                // both are zero, which is OK
+            }
+            else if (xclass == FP_INFINITE)
+            {
+                // + or -infinity
+                if (X [i] != Y [i]) return (false) ;
+            }
+            else if (xclass != FP_NAN)
+            {
+                // both are normal or subnormal, and nonzero
+                double err = fabsf (X [i] - Y [i]) / fabsf (X [i]) ;
+                if (err > eps) return (false) ;
+            }
+        }
+    }
+    return (true) ;
+}
+
diff --git a/Test/GB_random_mask.m b/Test/GB_random_mask.m
index a5cb9154aa..e1de2cddef 100644
--- a/Test/GB_random_mask.m
+++ b/Test/GB_random_mask.m
@@ -5,6 +5,9 @@
 % With 3 arguments, Mask is a sparse logical matrix.
 % With 4, Mask is a struct.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 M = (sprand (m, n, d) ~= 0) ;
 
 if (nargin < 4)
diff --git a/Test/GB_spec_Col_assign.m b/Test/GB_spec_Col_assign.m
index 498fc7a6df..13b5a19ef8 100644
--- a/Test/GB_spec_Col_assign.m
+++ b/Test/GB_spec_Col_assign.m
@@ -8,7 +8,7 @@
 %
 % This function does the same thing as GrB_Col_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_Col_extract.m b/Test/GB_spec_Col_extract.m
index 1257d2b966..07d3223b82 100644
--- a/Test/GB_spec_Col_extract.m
+++ b/Test/GB_spec_Col_extract.m
@@ -1,7 +1,7 @@
 function w = GB_spec_Col_extract (w, mask, accum, A, I, j, descriptor)
 %GB_SPEC_COL_EXTRACT a MATLAB mimic of GrB_Col_extract
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargout > 1 || nargin ~= 7)
diff --git a/Test/GB_spec_Matrix_extract.m b/Test/GB_spec_Matrix_extract.m
index 09f049e3ee..dde1e9665d 100644
--- a/Test/GB_spec_Matrix_extract.m
+++ b/Test/GB_spec_Matrix_extract.m
@@ -6,7 +6,7 @@
 %
 % MATLAB mimic of C<Mask> = accum (A (I,J))
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -19,8 +19,9 @@
 
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace, Mask_comp, Atrans, ~] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_Matrix_extractElement.m b/Test/GB_spec_Matrix_extractElement.m
index f7dba6aac5..853aede624 100644
--- a/Test/GB_spec_Matrix_extractElement.m
+++ b/Test/GB_spec_Matrix_extractElement.m
@@ -1,8 +1,7 @@
 function [x no_value] = GB_spec_Matrix_extractElement (A, i, j, xclass)
-%
 %GB_SPEC_MATRIX_EXTRACTELEMENT a MATLAB mimic of GrB_Matrix_extractElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 A = GB_spec_matrix (A) ;
diff --git a/Test/GB_spec_Row_assign.m b/Test/GB_spec_Row_assign.m
index 25e3f090d1..6c4522dea7 100644
--- a/Test/GB_spec_Row_assign.m
+++ b/Test/GB_spec_Row_assign.m
@@ -9,7 +9,7 @@
 %
 % This function does the same thing as GrB_Row_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_Vector_extract.m b/Test/GB_spec_Vector_extract.m
index 3fcae82d8e..c609f065bb 100644
--- a/Test/GB_spec_Vector_extract.m
+++ b/Test/GB_spec_Vector_extract.m
@@ -1,7 +1,7 @@
 function w = GB_spec_Vector_extract (w, mask, accum, u, I, descriptor)
 %GB_SPEC_VECTOR_EXTRACT a MATLAB mimic of GrB_Vector_extract
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargout > 1 || nargin ~= 6)
diff --git a/Test/GB_spec_Vector_extractElement.m b/Test/GB_spec_Vector_extractElement.m
index 118a677269..04926e5fb6 100644
--- a/Test/GB_spec_Vector_extractElement.m
+++ b/Test/GB_spec_Vector_extractElement.m
@@ -2,7 +2,7 @@
 %
 %GB_SPEC_VECTOR_EXTRACTELEMENT a MATLAB mimic of GrB_Matrix_extractElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (size (A,2) ~= 1)
diff --git a/Test/GB_spec_accum.m b/Test/GB_spec_accum.m
index 3a2796c8a2..77e5e52002 100644
--- a/Test/GB_spec_accum.m
+++ b/Test/GB_spec_accum.m
@@ -5,7 +5,7 @@
 %
 % Apply accum binary operator to the input C and the intermediate result T.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the operator; default is class(C) if class is not present
diff --git a/Test/GB_spec_accum_mask.m b/Test/GB_spec_accum_mask.m
index 004c65fd4f..b610f41477 100644
--- a/Test/GB_spec_accum_mask.m
+++ b/Test/GB_spec_accum_mask.m
@@ -1,37 +1,12 @@
 function C = GB_spec_accum_mask (C, Mask, accum, T, C_replace, ...
     Mask_complement, identity)
-%
 %GB_SPEC_ACCUM_MASK apply the accumulator and mask
 %
 % C<Mask> = accum (C,T): apply the accum, then mask, and return the result
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-% C_orig = C ;
-
 [Z simple] = GB_spec_accum (accum, C, T, identity) ;
 C = GB_spec_mask (C, Mask, Z, C_replace, Mask_complement, identity) ;
 
-% also test the simpler version (test disabled for now, to speed up testing)
-%{
-if (simple)
-    % create a function handle for accum: GB_spec_op(accum,x,y)
-    if (isempty (accum))
-        C2 = accum_mask (C_orig, Mask, [ ], T, C_replace, Mask_complement) ;
-    else
-        global accum_struct
-        accum_struct = accum ;
-        C2 = accum_mask (C_orig, Mask, @afunc, T, C_replace, Mask_complement) ;
-    end
-    assert (isequal (C, C2)) ;
-end
-end
-
-function z = afunc (x,y)
-global accum_struct
-z = GB_spec_op (accum_struct, x, y) ;
-end
-%}
-
-
diff --git a/Test/GB_spec_apply.m b/Test/GB_spec_apply.m
index cf5e5f992f..21b119d190 100644
--- a/Test/GB_spec_apply.m
+++ b/Test/GB_spec_apply.m
@@ -4,7 +4,7 @@
 % Usage:
 % C = GB_spec_apply (C, Mask, accum, op, A, descriptor)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -18,8 +18,9 @@
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
 [opname xyclass zclass] = GB_spec_operator (op, C.class) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans ~] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_assign.m b/Test/GB_spec_assign.m
index 64f7e14f55..00e6a64fad 100644
--- a/Test/GB_spec_assign.m
+++ b/Test/GB_spec_assign.m
@@ -16,7 +16,7 @@
 % not affect any part of C outside that row or column.  Those two functions
 % have their own GB_spec_Row_assign.m and GB_spec_Col_assign.m functions.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -31,8 +31,9 @@
 % and with where X(~X.pattern)==identity for all matrices A, B, and C.
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans ignore] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_build.m b/Test/GB_spec_build.m
index d7bd204a65..ddb0d4a1d8 100644
--- a/Test/GB_spec_build.m
+++ b/Test/GB_spec_build.m
@@ -46,7 +46,7 @@
 % parameters, or pass fewer inputs.  For exampe S = GB_spec_build (I, J, X,
 % nrows, ncols) uses defaults for op, and order, but not X, nrows and ncols.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_compare.m b/Test/GB_spec_compare.m
index 10c1352847..3a735a23ad 100644
--- a/Test/GB_spec_compare.m
+++ b/Test/GB_spec_compare.m
@@ -7,15 +7,18 @@
 % some GraphBLAS method.  C_mex = GrG_mex_* (...) is the output of the
 % corresponding MATLAB interface to the true GraphBLAS method, in C.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the semiring identity
 if (nargin < 3)
-    identity = [ ] ;
+    identity = 0 ;
 end
 if (isempty (identity))
-    identity = 0 ;
+    % results from the ANY monoid or operator cannot be checked with
+    % this function, since many results are possible.
+    ok = true ;
+    return
 end
 
 if (nargin < 4)
@@ -78,12 +81,9 @@
 %}
 
 if (~ok_class || ~ok_pattern || ~ok_matrix)
-    % C_spec
-    % % C_mex
-    % C1
-    % C2
     fprintf ('matrix: %d pattern: %d class %d\n', ...
         ok_matrix, ok_pattern, ok_class) ;
+    norm (double (C1.matrix) - double (C2.matrix), 1)
 end
 
 % with no output, just assert that ok is true
diff --git a/Test/GB_spec_descriptor.m b/Test/GB_spec_descriptor.m
index 0342e3e2f0..c04f06daa6 100644
--- a/Test/GB_spec_descriptor.m
+++ b/Test/GB_spec_descriptor.m
@@ -1,6 +1,5 @@
-function [C_replace Mask_comp Atrans Btrans descriptor] = ...
+function [C_replace Mask_comp Atrans Btrans Mask_struct descriptor] = ...
     GB_spec_descriptor (descriptor)
-%
 %GB_SPEC_DESCRIPTOR return components of a descriptor
 %
 % Returns the components of the descriptor struct.  Defaults are used if not
@@ -13,7 +12,7 @@
 % inp0:  'default' or 'tran'
 % inp1:  'default' or 'tran'
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isempty (descriptor))
@@ -39,8 +38,23 @@
 end
 
 C_replace = isequal (descriptor.outp, 'replace') ;
-Mask_comp = isequal (descriptor.mask, 'scmp') ;
 Atrans    = isequal (descriptor.inp0, 'tran') ;
 Btrans    = isequal (descriptor.inp1, 'tran') ;
 
+switch (descriptor.mask)
+    case {'scmp', 'complement'}
+        Mask_comp = true ;
+        Mask_struct = false ;
+    case {'structural'}
+        Mask_comp = false ;
+        Mask_struct = true ;
+    case {'structural complement'}
+        Mask_comp = true ;
+        Mask_struct = true ;
+    otherwise
+        Mask_comp = false ;
+        Mask_struct = false ;
+    end
+end
+
 
diff --git a/Test/GB_spec_eWiseAdd_Matrix.m b/Test/GB_spec_eWiseAdd_Matrix.m
index 124dd81148..6d48b97684 100644
--- a/Test/GB_spec_eWiseAdd_Matrix.m
+++ b/Test/GB_spec_eWiseAdd_Matrix.m
@@ -7,7 +7,7 @@
 % Computes C<Mask> = accum(C,T), in GraphBLAS notation, where T =A+B, A'+B,
 % A+B' or A'+B'.  The pattern of T is the union of A and B.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -22,8 +22,9 @@
 A = GB_spec_matrix (A) ;
 B = GB_spec_matrix (B) ;
 [add_op xyclass zclass] = GB_spec_operator (add, C.class) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans Btrans] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_eWiseAdd_Vector.m b/Test/GB_spec_eWiseAdd_Vector.m
index af1b0626cd..1978d0918f 100644
--- a/Test/GB_spec_eWiseAdd_Vector.m
+++ b/Test/GB_spec_eWiseAdd_Vector.m
@@ -7,7 +7,7 @@
 % Computes w<mask> = accum(w,t), in GraphBLAS notation, where t =u+v,
 % The pattern of t is the union of u and v.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_eWiseMult_Matrix.m b/Test/GB_spec_eWiseMult_Matrix.m
index 91a4db5b9d..acc3496974 100644
--- a/Test/GB_spec_eWiseMult_Matrix.m
+++ b/Test/GB_spec_eWiseMult_Matrix.m
@@ -7,7 +7,7 @@
 % Computes C<Mask> = accum(C,T), in GraphBLAS notation, where T =A.*B, A'.*B,
 % A.*B' or A'.*B'.  The pattern of T is the union of A and B.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -22,8 +22,9 @@
 A = GB_spec_matrix (A) ;
 B = GB_spec_matrix (B) ;
 [mult_op xyclass zclass] = GB_spec_operator (mult, C.class) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans Btrans] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_eWiseMult_Vector.m b/Test/GB_spec_eWiseMult_Vector.m
index 5dba60db0e..1448b65f39 100644
--- a/Test/GB_spec_eWiseMult_Vector.m
+++ b/Test/GB_spec_eWiseMult_Vector.m
@@ -7,7 +7,7 @@
 % Computes w<mask> = accum(w,t), in GraphBLAS notation, where t =u.*v,
 % The pattern of t is the union of u and v.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_extractTuples.m b/Test/GB_spec_extractTuples.m
index 2ef5bb31ec..beb93a4612 100644
--- a/Test/GB_spec_extractTuples.m
+++ b/Test/GB_spec_extractTuples.m
@@ -1,7 +1,7 @@
 function [I, J, X] = GB_spec_extractTuples (A, xclass)
 %GB_SPEC_EXTRACTTUPLES a MATLAB mimic of GrB_*_extractTuples
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 A = GB_spec_matrix (A) ;
diff --git a/Test/GB_spec_getmask.m b/Test/GB_spec_getmask.m
index 0b3694289a..93a4d80848 100644
--- a/Test/GB_spec_getmask.m
+++ b/Test/GB_spec_getmask.m
@@ -1,8 +1,23 @@
-function Mask = GB_spec_getmask (Mask)
+function Mask = GB_spec_getmask (Mask, Mask_struct)
 %GB_SPEC_GETMASK return the mask, typecasted to logical
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 if (isstruct (Mask))
-    Mask = Mask.matrix ;
+    if (Mask_struct)
+        if (isfield (Mask, 'pattern'))
+            Mask = Mask.pattern ;
+        elseif (issparse (Mask))
+            Mask = GB_spones_mex (Mask) ;
+        end
+    else
+        Mask = Mask.matrix ;
+    end
+else
+    if (Mask_struct && issparse (Mask))
+        Mask = GB_spones_mex (Mask) ;
+    end
 end
 
 Mask = GB_mex_cast (full (Mask), 'logical') ;
diff --git a/Test/GB_spec_identity.m b/Test/GB_spec_identity.m
index 8c38f2129c..45d88e89f9 100644
--- a/Test/GB_spec_identity.m
+++ b/Test/GB_spec_identity.m
@@ -14,7 +14,7 @@
 % The 8 addititive monoids supported are 'min', 'max', 'plus', 'times', 'or',
 % 'and', 'xor', and 'eq'.   For the last 4 the class must be 'logical'
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 1)
@@ -93,6 +93,10 @@
         % x == x * 1
         identity = 1 ;
 
+    case 'any'
+
+        identity = [ ] ;
+
     case 'or'
 
         % x == x or false
diff --git a/Test/GB_spec_kron.m b/Test/GB_spec_kron.m
index 7fce0b1ede..13d2581d25 100644
--- a/Test/GB_spec_kron.m
+++ b/Test/GB_spec_kron.m
@@ -7,7 +7,7 @@
 % Computes C<Mask> = accum(C,T), in GraphBLAS notation, where T = kron(A,B),
 % kron(A',B), kron(A,B') or kron(A',B')
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -22,8 +22,9 @@
 A = GB_spec_matrix (A) ;
 B = GB_spec_matrix (B) ;
 [mult_op xyclass zclass] = GB_spec_operator (mult, C.class) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans Btrans] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_mask.m b/Test/GB_spec_mask.m
index 55bcffe98b..566e119828 100644
--- a/Test/GB_spec_mask.m
+++ b/Test/GB_spec_mask.m
@@ -12,6 +12,7 @@
 % Mask: if empty or not present, Mask = ones (size (C))
 % C_replace: set C to zero first. Default is false.
 % Mask_complement: use ~Mask instead of Mask. Default is false.
+% Mask_struct: handled by GB_spec_mask.
 % identity: the additive identity of the semiring.  Default is zero.
 %   This is only needed because the GB_spec_* routines operate on dense
 %   matrices, and thus they need to know the value of the implicit 'zero'.
@@ -19,7 +20,7 @@
 % This method operates on both plain matrices and on structs with
 % matrix, pattern, and class components.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 6)
@@ -72,7 +73,7 @@
         R = C ;
     end
 else
-    % form the Boolean mask. For GraphBLAS, this does the
+    % form the valued mask. For GraphBLAS, this does the
     % right thing and ignores explicit zeros in Mask.
     Mask = (Mask ~= 0) ;
     if (~Mask_complement)
diff --git a/Test/GB_spec_matrix.m b/Test/GB_spec_matrix.m
index b74a2d594e..c3bc3fe66b 100644
--- a/Test/GB_spec_matrix.m
+++ b/Test/GB_spec_matrix.m
@@ -65,7 +65,7 @@
 % must first be passed to this function, C0=GB_spec_matrix(C0,identity) and
 % then C0 and C1 should be identical.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the semiring addititive identity, if present
diff --git a/Test/GB_spec_mxm.m b/Test/GB_spec_mxm.m
index 358c1a2a67..3153b1b2ac 100644
--- a/Test/GB_spec_mxm.m
+++ b/Test/GB_spec_mxm.m
@@ -43,7 +43,7 @@
 % C<Mask> = accum (C,T).  See GrB_accum_mask for a description of this
 % last step.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -57,12 +57,15 @@
 % Convert inputs to dense matrices with explicit patterns and classes,
 % and with where X(~X.pattern)==identity for all matrices A, B, and C.
 [multiply add identity tclass] = GB_spec_semiring (semiring) ;
+if (isempty (identity))
+    identity = 0 ;
+end
 C = GB_spec_matrix (C, identity) ;
 A = GB_spec_matrix (A, identity) ;
 B = GB_spec_matrix (B, identity) ;
-% Mask is a dense logical matrix, not a struct
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans Btrans] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
@@ -106,3 +109,4 @@
 
 % C<Mask> = accum (C,T): apply the accum, then Mask, and return the result
 C = GB_spec_accum_mask (C, Mask, accum, T, C_replace, Mask_comp, identity) ;
+
diff --git a/Test/GB_spec_mxv.m b/Test/GB_spec_mxv.m
index d8328c73b5..73dffea420 100644
--- a/Test/GB_spec_mxv.m
+++ b/Test/GB_spec_mxv.m
@@ -6,7 +6,7 @@
 %
 % w, mask, and u are vectors.  u is not transposed (descriptor inp1 ignored)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargout > 1 || nargin ~= 7)
diff --git a/Test/GB_spec_op.m b/Test/GB_spec_op.m
index 5794fea363..2c532c4eab 100644
--- a/Test/GB_spec_op.m
+++ b/Test/GB_spec_op.m
@@ -1,5 +1,4 @@
 function C = GB_spec_op (op, A, B)
-%
 %GB_SPEC_OP apply a unary or binary operator
 %
 % Apply a binary operator z = f (x,y) element-wise to x and y, or a unary
@@ -9,28 +8,16 @@
 % op or op.opname is a string with just the operator name.  Valid names of
 % binary operators are 'first', 'second', 'min', 'max', 'plus', 'minus',
 % 'rminus', 'times', 'div', 'rdiv', 'eq', 'ne', 'gt', 'lt', 'ge', 'le', 'or',
-% 'and', 'xor'.  'iseq', 'isne', 'isgt', 'islt', 'isge', 'le'.
+% 'and', 'xor'.  'iseq', 'isne', 'isgt', 'islt', 'isge', 'le', 'pair', 'any'
 %
 % Unary operators are 'one', 'identity', 'ainv', 'abs', 'minv', and 'not'
 %
 % op.opclass: 'logical', 'int8', 'uint8', 'int16', 'uint16', 'int32',
 %  'uint32', 'int64', 'uint64', 'single', or 'double'
 %
-%  17 valid operators z=f(x,y) where all of x,y,z are any of the 11 classes:
-%  'first', 'second', 'min', 'max', 'plus', 'minus', 'times', 'div',
-%  'iseq', 'isne', 'isgt', 'islt', 'isge', 'isle', 'and', 'or', 'xor'
-%
-%  6 valid operators z=f(x,y) where x and y are any of the 11 classes, but
-%  z is logical: 'eq', 'ne', 'gt', 'lt', 'ge', 'le'
-%
-% This gives a total of (10 numeric, 6 'is', 3 bool) * 11 types = 209 ops
-% for which x, y, and z have the same type, and 6*11 = 66 unary operators
-% z=f(x) where z and x have the same type.
-%
 % The class of Z is the same as the class of the output of the operator,
 % which is op.opclass except for 'eq', 'ne', 'gt', 'lt', 'ge', 'le',
-% in which case Z is logical.  This gives 6*11 = 66 operators of this
-% type, where z = f(x,y), and z is logical.
+% in which case Z is logical.
 %
 % Intrinsic MATLAB operators are used as much as possible, so as to test
 % GraphBLAS operators.  Some must be done in GraphBLAS because the
@@ -38,7 +25,7 @@
 % Also, typecasting in MATLAB and GraphBLAS differs under underflow and
 % overflow conditions.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % get the operator name and class
@@ -66,11 +53,15 @@
 
 switch opname
 
-    % 10 binary operators, result is opclass
+    % 12 binary operators, result is opclass
     case 'first'
         z = x ;
     case 'second'
         z = y ;
+    case 'any'
+        z = y ;
+    case 'pair'
+        z = ones (size (x), opclass) ;
     case 'min'
         % min(x,y) in SuiteSparse:GraphBLAS is min(x,y,'omitnan') in MATLAB.
         % see discussion in SuiteSparse/GraphBLAS/Source/GB.h
diff --git a/Test/GB_spec_operator.m b/Test/GB_spec_operator.m
index f4062c2a9b..b825f0d8b4 100644
--- a/Test/GB_spec_operator.m
+++ b/Test/GB_spec_operator.m
@@ -1,5 +1,4 @@
 function [opname opclass zclass] = GB_spec_operator (op, opclass_default)
-%
 %GB_SPEC_OPERATOR get the contents of an operator
 %
 % On input, op can be a struct with a string op.opname that gives the operator
@@ -11,7 +10,7 @@
 % class of the output is zclass, and it is either the same as x and y, or
 % logical for 'eq', 'ne', 'gt', 'lt', 'ge', 'le'.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isempty (op))
diff --git a/Test/GB_spec_opsall.m b/Test/GB_spec_opsall.m
index 961028860c..574540c86c 100644
--- a/Test/GB_spec_opsall.m
+++ b/Test/GB_spec_opsall.m
@@ -3,13 +3,14 @@
 %
 % [mult_ops unary_ops add_ops classes semirings select_ops] = GB_spec_opsall
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 mult_ops = {
-% 10 operators where x,y,z are all the same class
+% 11 operators where x,y,z are all the same class
 'first',     % z = x
 'second',    % z = y
+'pair',      % z = 1
 'min',       % z = min(x,y)
 'max',       % z = max(x,y)
 'plus',      % z = x + y
@@ -56,6 +57,7 @@
 'max',       % z = max(x,y) : identity is -inf
 'plus',      % z = x + y    : identity is 0
 'times',     % z = x * y    : identity is 1
+'any',       % z = pick x or y: both sets
 %----------------------------
 % 4 monoids for just boolean
 'or',        % z = x || y   : identity is 0 (false)
@@ -94,9 +96,9 @@
 
 n = 0 ;
 
-% 760: x,y,z all nonboolean:  (10+6+3)*4*10
-for mult = {'first', 'second', 'min', 'max', 'plus', 'minus', 'rminus', ...
-            'times', 'div', 'rdiv', ...
+% 800: x,y,z all nonboolean:  (10+6+3)*4*10
+for mult = {'first', 'second', 'pair', 'min', 'max', 'plus', 'minus', ...
+            'rminus', 'times', 'div', 'rdiv', ...
             'iseq', 'isne', 'isgt', 'islt', 'isge', 'isle', ...
             'or', 'and', 'xor', }
     for add = { 'min', 'max', 'plus', 'times' }
@@ -121,9 +123,10 @@
     end
 end
 
-% 40: x,y,z all boolean: 10 * 4
-for mult = { 'first', 'second', 'or', 'and', 'xor', 
-             'eq', 'gt', 'lt', 'ge', 'le' }
+%-------------------------------------------------------------------------------
+% 44: x,y,z all boolean: 10 * 4
+for mult = { 'first', 'second', 'pair', 'or', 'and', 'xor', ...
+    'eq', 'gt', 'lt', 'ge', 'le' }
     for add = { 'or', 'and', 'xor', 'eq' }
         n = n + 1 ;
         s = struct ('multiply', mult{1}, 'add', add{1}, 'class', c{1}) ;
diff --git a/Test/GB_spec_random.m b/Test/GB_spec_random.m
index 6061448b09..b7cb3aaa3a 100644
--- a/Test/GB_spec_random.m
+++ b/Test/GB_spec_random.m
@@ -11,7 +11,7 @@
 % is_csc: true for CSC, false for CSR; defaults to true
 % is_hyper: false for non-hypersparse, true for hypersparse, default false
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/GB_spec_reduce_to_scalar.m b/Test/GB_spec_reduce_to_scalar.m
index ffe7b352d1..64ebb6d2f9 100644
--- a/Test/GB_spec_reduce_to_scalar.m
+++ b/Test/GB_spec_reduce_to_scalar.m
@@ -8,7 +8,7 @@
 %
 % cin is a dense scalar
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -37,6 +37,9 @@
 
 % get the identity
 identity = GB_spec_identity (reduce_op, reduce_class) ;
+if (isempty (identity))
+    identity = 0 ;
+end
 
 % get the input matrix
 A = GB_spec_matrix (A, identity) ;
diff --git a/Test/GB_spec_reduce_to_vector.m b/Test/GB_spec_reduce_to_vector.m
index 5298fd3393..96647b4d71 100644
--- a/Test/GB_spec_reduce_to_vector.m
+++ b/Test/GB_spec_reduce_to_vector.m
@@ -6,7 +6,7 @@
 %
 % Reduces a matrix to a vector
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -33,6 +33,9 @@
 
 % get the identity
 identity = GB_spec_identity (reduce_op, reduce_class) ;
+if (isempty (identity))
+    identity = 0 ;
+end
 
 % get the input matrix
 A = GB_spec_matrix (A, identity) ;
@@ -41,9 +44,9 @@
 w = GB_spec_matrix (w, identity) ;
 
 % get the mask
-mask = GB_spec_getmask (mask) ;
-
-[C_replace Mask_comp Atrans ~] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+mask = GB_spec_getmask (mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_resize.m b/Test/GB_spec_resize.m
index 6eacba0c63..e51ae3bfea 100644
--- a/Test/GB_spec_resize.m
+++ b/Test/GB_spec_resize.m
@@ -4,7 +4,7 @@
 % Usage:
 % C = GB_spec_resize (A, nrows_new, ncols_new)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
diff --git a/Test/GB_spec_select.m b/Test/GB_spec_select.m
index 06c2cc613b..adecf29e8f 100644
--- a/Test/GB_spec_select.m
+++ b/Test/GB_spec_select.m
@@ -4,7 +4,7 @@
 % Usage:
 % C = GB_spec_select (C, Mask, accum, opname, A, thunk, descriptor)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -17,8 +17,9 @@
 
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans ~] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_semiring.m b/Test/GB_spec_semiring.m
index c0902e8ae1..03fe24fdd4 100644
--- a/Test/GB_spec_semiring.m
+++ b/Test/GB_spec_semiring.m
@@ -22,10 +22,8 @@
 %               'logical' (boolean in GraphBLAS), 'int8', 'uint8', 'int16',
 %               'uint16', 'int32', 'uint32', 'int64', 'uint64', 'single' (FP43
 %               in GraphBLAS), 'double' (FP64 in GraphBLAS).
-%
-% Refer to Source/GB_AxB_Gustavson_builtin.c for a list of valid semirings.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % set the default semiring
@@ -58,11 +56,13 @@
 
 switch mult
 
-    % 10, the monoid has the same type as x, y, and z, all semiring.class
+    % 11, the monoid has the same type as x, y, and z, all semiring.class
     case 'first'      % z = x
          ;
     case 'second'     % z = y
          ;
+    case 'pair'       % z = 1
+         ;
     case 'min'        % z = min(x,y)
          ;
     case 'max'        % z = max(x,y)
@@ -122,7 +122,7 @@
 
 zbool = isequal (zclass, 'logical') ;
 
-% min, max, plus, times monoids: valid for all 11 classes
+% min, max, plus, times, any monoids: valid for all 11 classes
 % or, and, xor, eq monoids:  valid only for logical
 switch add_opname
     case 'min'
@@ -133,6 +133,8 @@
         ok = 1 ;
     case 'times'
         ok = 1 ;
+    case 'any'
+        ok = 1 ;
     case 'or'
         ok = zbool ;
     case 'and'
diff --git a/Test/GB_spec_subassign.m b/Test/GB_spec_subassign.m
index 287b55ed1c..c25f11a78a 100644
--- a/Test/GB_spec_subassign.m
+++ b/Test/GB_spec_subassign.m
@@ -12,7 +12,7 @@
 % is the same size as A (after optionally being transpose) and the submatrix
 % C(I,J).  Entries outside the C(I,J) submatrix are never modified.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -27,8 +27,9 @@
 % and with where X(~X.pattern)==identity for all matrices A, B, and C.
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans ignore] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 
diff --git a/Test/GB_spec_transpose.m b/Test/GB_spec_transpose.m
index e9d44ed998..7d6a49ef49 100644
--- a/Test/GB_spec_transpose.m
+++ b/Test/GB_spec_transpose.m
@@ -26,7 +26,11 @@
 % descriptor is a optional struct.  Defaults are used if empty or not present.
 %       descriptor.outp = 'replace' (clear C first) or 'default' (use C as-is)
 %       descriptor.inp0 = 'tran' (do C=A) or 'default' (do C=A')
-%       descriptor.mask = 'scmp' (use ~Mask) or 'default' (use Mask)
+%       descriptor.mask =
+%               'default': use Mask
+%               'complement' or 'scmp': use ~Mask
+%               'structural': use spones(Mask)
+%               'structural complement': use ~spones(Mask)
 %
 % GB_spec_transpose implements the entire specification of GrB_transpose, with
 % a few exceptions.
@@ -53,7 +57,7 @@
 % Use an empty value ([ ] or '') to obtain the default value for optional
 % parameters.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 %-------------------------------------------------------------------------------
@@ -66,8 +70,9 @@
 
 C = GB_spec_matrix (C) ;
 A = GB_spec_matrix (A) ;
-Mask = GB_spec_getmask (Mask) ;
-[C_replace Mask_comp Atrans Btrans] = GB_spec_descriptor (descriptor) ;
+[C_replace Mask_comp Atrans Btrans Mask_struct] = ...
+    GB_spec_descriptor (descriptor) ;
+Mask = GB_spec_getmask (Mask, Mask_struct) ;
 
 %-------------------------------------------------------------------------------
 % do the work via a clean MATLAB interpretation of the entire GraphBLAS spec
diff --git a/Test/GB_spec_vxm.m b/Test/GB_spec_vxm.m
index 093b24cb7f..090bde6867 100644
--- a/Test/GB_spec_vxm.m
+++ b/Test/GB_spec_vxm.m
@@ -6,7 +6,7 @@
 %
 % w, mask, and u are column vectors.  Computes w'=u'*A or w'=u'*A'
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargout > 1 || nargin ~= 7)
diff --git a/Test/GB_user_op.m b/Test/GB_user_op.m
index 73703feb25..b6f806ae85 100644
--- a/Test/GB_user_op.m
+++ b/Test/GB_user_op.m
@@ -12,7 +12,7 @@
 % No typecasting is done for user-defined operators.  x,y,z are either
 % double complex or double
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 tol = false ;
@@ -28,6 +28,8 @@
         z = x ;
     case 'second'
         z = y ;
+    case 'pair'
+        z = ones (size (x), class (x)) ;
     case 'min'
         z = min (x,y,'includenan') ;
     case 'max'
diff --git a/Test/GB_user_opsall.m b/Test/GB_user_opsall.m
index adbe4f2733..3f3554039f 100644
--- a/Test/GB_user_opsall.m
+++ b/Test/GB_user_opsall.m
@@ -1,7 +1,7 @@
 function [complex_binaryops complex_unaryops ] = GB_user_opsall
 %GB_USER_OPSALL return list of complex operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, ~, ~, ~, ~] = GB_spec_opsall ;
diff --git a/Test/Makefile b/Test/Makefile
index 368906cc14..4f74fab2f5 100644
--- a/Test/Makefile
+++ b/Test/Makefile
@@ -2,7 +2,7 @@
 # GraphBLAS/Test/Makefile
 #-------------------------------------------------------------------------------
 
-# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 # http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 #-------------------------------------------------------------------------------
diff --git a/Test/README.txt b/Test/README.txt
index eb06a4315c..c8b5cd03bd 100644
--- a/Test/README.txt
+++ b/Test/README.txt
@@ -1,4 +1,4 @@
-SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 GraphBLAS/Test:
@@ -15,18 +15,12 @@ files to provide 'help GB_mex...' documentation.
 
 Requirements:  the mex command must use a C compiler supporting ANSI C11.
 
-To run the tests you must first compile "spok":
-
-    cd spok
-    spok_install
-
-Next, compile and install GraphBLAS.  Then, in this directory, to compile and
-run all the tests:
+To run the tests, use the following command in this directory, in MATLAB:
 
     make ; testall
 
 If you get a linking problem on linux, add this directory to your
-LD_LIBRARY_PATCH, so that the libgraphblas.so can be found by the mexFunctions.
+LD_LIBRARY_PATH, so that the libgraphblas.so can be found by the mexFunctions.
 
 Longer tests can be done as well (this can take a whole day):
 
@@ -49,9 +43,9 @@ handling in GraphBLAS to be tested.  If the function fails, the counter is
 reset, and the test is done again.  The initial state of this counter is
 increased until the function succeeds.  During this entire process, a count is
 kept of malloc'd blocks, and an error is reported if a leak is found.
-GraphBLAS will be very slow with malloc debugging enabled.  It is only
-done through the MATLAB interface and has no effect when GraphBLAS is
-used through a C program (as in the Demo/ programs).
+GraphBLAS will be very slow with malloc debugging enabled.  It is only done
+through the MATLAB interface and has no effect when GraphBLAS is used through a
+C program (as in the Demo/ programs).
 
 To enable further debugging tests, see the comments in Source/GB.h.
 
diff --git a/Test/Template/GB_mx_build_template.c b/Test/Template/GB_mx_build_template.c
index f28e2506cd..42534bc17a 100644
--- a/Test/Template/GB_mx_build_template.c
+++ b/Test/Template/GB_mx_build_template.c
@@ -2,7 +2,7 @@
 // GB_mx_build_template: build a sparse vector or matrix
 //------------------------------------------------------------------------------
 
-// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 //------------------------------------------------------------------------------
diff --git a/Test/accum_mask.m b/Test/accum_mask.m
index 3ede1dca7f..3979d39e61 100644
--- a/Test/accum_mask.m
+++ b/Test/accum_mask.m
@@ -1,8 +1,8 @@
 function C = accum_mask (C, Mask, accum, T, C_replace, Mask_complement)
 %ACCUM_MASK apply the mask
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [m n] = size (C.matrix) ;
 Z.matrix  = zeros (m, n) ;
diff --git a/Test/accum_mask2.m b/Test/accum_mask2.m
index 96407b29f8..df962d592f 100644
--- a/Test/accum_mask2.m
+++ b/Test/accum_mask2.m
@@ -4,7 +4,7 @@
 % The purpose is for illustration to describe what the accum/mask operation
 % does, not for actual testing.  This file appears in the User Guide.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [nrows ncols] = size (C.matrix) ;
diff --git a/Test/atest.m b/Test/atest.m
index 2fa325da5b..f5158c8833 100644
--- a/Test/atest.m
+++ b/Test/atest.m
@@ -1,4 +1,8 @@
 % test GrB_assign and GxB_subassign
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/atest11.m b/Test/atest11.m
index 0476c58667..13c71ef6fc 100644
--- a/Test/atest11.m
+++ b/Test/atest11.m
@@ -1,4 +1,8 @@
 % test GrB_assign and GxB_subassign
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/bfs_book.m b/Test/bfs_book.m
index 2ce483ae9a..e47e94e0f4 100644
--- a/Test/bfs_book.m
+++ b/Test/bfs_book.m
@@ -1,3 +1,7 @@
+%BFS_BOOK run BFS on a small graph
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % graph on the cover of the book, 'Graph Algorithms in the language
 % of linear algebra'.  The source node is node 4.
@@ -30,3 +34,4 @@
 end
 
 fprintf ('bfs_book test passed\n') ;
+
diff --git a/Test/bfs_matlab.m b/Test/bfs_matlab.m
index 2cc3e81620..ba30831367 100644
--- a/Test/bfs_matlab.m
+++ b/Test/bfs_matlab.m
@@ -11,6 +11,9 @@
 % kth level, where the shortest path (in terms of # of edges) from  s to j has
 % length k+1.  The source node s defaults to 1.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 [m n] = size (A) ;
 if (m ~= n)
     error ('A must be square') ;
diff --git a/Test/bfs_test.m b/Test/bfs_test.m
index ef6c4ddfb5..97a7588b52 100644
--- a/Test/bfs_test.m
+++ b/Test/bfs_test.m
@@ -1,6 +1,9 @@
 function v = bfs_test (A, s)
 %BFS_TEST compares bfs_matlab and GB_mex_bfs
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 if (nargin < 2)
     s = 1 ;
 end
diff --git a/Test/btest.m b/Test/btest.m
index aeda3e79f0..616bee4305 100644
--- a/Test/btest.m
+++ b/Test/btest.m
@@ -1,4 +1,8 @@
 % test GrB_build
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 nthreads_set(2)
 grbinfo
diff --git a/Test/debug_off.m b/Test/debug_off.m
index e8d9947294..bb1b901b65 100644
--- a/Test/debug_off.m
+++ b/Test/debug_off.m
@@ -1,7 +1,7 @@
 function debug_off
 %DEBUG_OFF turn off malloc debugging
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 global GraphBLAS_debug
diff --git a/Test/debug_on.m b/Test/debug_on.m
index 0b45911a69..bc21bc2eac 100644
--- a/Test/debug_on.m
+++ b/Test/debug_on.m
@@ -1,7 +1,7 @@
 function debug_on
 %DEBUG_ON turn on malloc debugging
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 global GraphBLAS_debug
diff --git a/Test/ee.m b/Test/ee.m
index 4e464049ed..ea88e824ee 100644
--- a/Test/ee.m
+++ b/Test/ee.m
@@ -1,4 +1,8 @@
 %EE eWiseMult and eWiseAdd performance tests
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 addpath ('~/ssget') ;
 addpath ('spok') ;
diff --git a/Test/errlog2.txt b/Test/errlog2.txt
index 299089577a..fe1cd66d21 100644
--- a/Test/errlog2.txt
+++ b/Test/errlog2.txt
@@ -1,111 +1,109 @@
-GB_mex_errors, line 511:GraphBLAS error: GrB_INVALID_VALUE
-function: GxB_Global_Option_set (field, value)
-nthreads_max [7777777] must be < GxB_NTHREADS_MAX [2048]
-Recompile with a higher value of GxB_NTHREADS_MAX,
-using -DGxB_NTHREADS_MAX=7777777 (or higher, as needed)
-
-GB_mex_errors, line 587:GraphBLAS error: GrB_DIMENSION_MISMATCH
-function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
-Thunk must be a vector of length 1
-
-GB_mex_errors, line 603:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 600:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 operator ge_thunk not defined for user-defined types
 
-GB_mex_errors, line 612:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 609:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 operator ge_zero not defined for user-defined types
 
-GB_mex_errors, line 615:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 612:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 operator gt_zero not defined for user-defined types
 
-GB_mex_errors, line 618:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 615:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 operator lt_zero not defined for user-defined types
 
-GB_mex_errors, line 621:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 618:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 operator le_zero not defined for user-defined types
 
-GB_mex_errors, line 624:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 621:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 incompatible type for C=le_thunk(A,Thunk):
 input A type [int16_t] and Thunk type [user_int] not compatible
 
-GB_mex_errors, line 763:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 707:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 slice matrix invalid: unfinished [Aslice [0]]
 
-GB_mex_errors, line 769:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 713:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 invalid slice matrix structure [Aslice [0]]
 
-GB_mex_errors, line 774:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 718:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 non-shallow: invalid slice matrix [Aslice [0]
 
 
-GB_mex_errors, line 780:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 724:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 hfirst: invalid slice matrix [Aslice [0]]
 
 
-GB_mex_errors, line 763:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 707:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 slice matrix invalid: unfinished [Aslice [0]]
 
-GB_mex_errors, line 769:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 713:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 invalid slice matrix structure [Aslice [0]]
 
-GB_mex_errors, line 774:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 718:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 non-shallow: invalid slice matrix [Aslice [0]
 
 
-GB_mex_errors, line 780:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 724:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Matrix_fprint (A, name, pr, f)
 hfirst: invalid slice matrix [Aslice [0]]
 
 
-GB_mex_errors, line 800:GraphBLAS error: GrB_INVALID_VALUE
-function: GB_mex_about
-Too many concurrent threads
-
-GB_mex_errors, line 845:GraphBLAS error: GrB_NULL_POINTER
+GB_mex_errors, line 774:GraphBLAS error: GrB_NULL_POINTER
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 Required argument is null: [Thunk]
 
-GB_mex_errors, line 850:GraphBLAS error: GrB_INVALID_VALUE
+GB_mex_errors, line 779:GraphBLAS error: GrB_INVALID_VALUE
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 User-defined operator select_plus_one(A,Thunk) has a Thunk input,
 which must not be empty
 
-GB_mex_errors, line 856:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 785:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 User-defined operator select_plus_one(A,Thunk) has a Thunk input
 type of [double], which must exactly match the type of the
 Thunk parameter; parameter to GxB_select has type [float]
 
-GB_mex_errors, line 861:GraphBLAS error: GrB_DOMAIN_MISMATCH
+GB_mex_errors, line 790:GraphBLAS error: GrB_DOMAIN_MISMATCH
 function: GxB_Matrix_select (C, M, accum, op, A, Thunk, desc)
 User-defined operator select_nothing(A,Thunk) does not take a Thunk
 input, but Thunk parameter is non-NULL
 
-GB_mex_errors, line 867:GraphBLAS error: GrB_UNINITIALIZED_OBJECT
+GB_mex_errors, line 796:GraphBLAS error: GrB_UNINITIALIZED_OBJECT
 function: GxB_Type_fprint (type, name, pr, f)
 Type is uninitialized: [user_type]
 
-GB_mex_errors, line 870:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 799:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_SelectOp_fprint (selectop, name, pr, f)
 SelectOp has an invalid ttype: selectop [select_nothing]
 
-GB_mex_errors, line 875:GraphBLAS error: GrB_UNINITIALIZED_OBJECT
+GB_mex_errors, line 804:GraphBLAS error: GrB_UNINITIALIZED_OBJECT
 function: GxB_Scalar_fprint (s, name, pr, f)
 scalar is uninitialized: [thunk]
 
-GB_mex_errors, line 969:GraphBLAS error: GrB_INVALID_OBJECT
+GB_mex_errors, line 898:GraphBLAS error: GrB_INVALID_OBJECT
 function: GxB_Scalar_fprint (s, name, pr, f)
 GxB_Scalar is invalid [scalar2]
 
+GB_mex_errors, line 955:GraphBLAS error: GrB_INVALID_VALUE
+function: GxB_Desc_set (desc, field, value)
+predefined descriptors may not be modified
+
+GB_mex_errors, line 958:GraphBLAS error: GrB_INVALID_VALUE
+function: GrB_Descriptor_set (desc, field, value)
+predefined descriptors may not be modified
+
+GB_mex_errors, line 999:GraphBLAS error: GrB_OUT_OF_MEMORY
+function: GrB_Matrix_assign_FP64 (C, M, accum, x, Rows, nRows, Cols, nCols, desc)
+out of memory
+
diff --git a/Test/etest.m b/Test/etest.m
index df0a7d22b4..84e617e1bd 100644
--- a/Test/etest.m
+++ b/Test/etest.m
@@ -1,4 +1,8 @@
 % test eWise
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/flopcount.m b/Test/flopcount.m
index 88affd14c6..069aa18880 100644
--- a/Test/flopcount.m
+++ b/Test/flopcount.m
@@ -1,19 +1,26 @@
-function flops = flopcount (M,A,B) ;
-%FLOPCOUNT returns cumulative sum of flop counts for A*B or C<M>=A*B
+function [flops mwork] = flopcount (M,Mask_complement,A,B) ;
+%FLOPCOUNT cumulative sum of flop counts for A*B, C<M>=A*B, C<!M>=A*B
 %
-%   flops = flopcount (M,A,B) ;
+% flops = flopcount (M,Mask_complementA,B) ;
 %
 % flops(j) is the flops to compute A*B(1:j-1), and flops(n+1) is the total
 % flopcount, if B is m-by-n.
 %
-% Each 'flop' counted is actually a multiply-add pair.  M can be [ ]. The
-% flopcount m-file returns the same thing as GB_AxB_flopcount.
+% Each 'flop' counted is actually a multiply-add.  M can be [ ]. The
+% flopcount m-file returns the same thing as GB_AxB_flopcount.  Also
+% included in flops(j) is the work needed to access the mask M(:,j).
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 n = size (B,2) ;
 flops = zeros (1,n) ;
+mwork = 0 ;
 
 if (isempty (M))
 
+    % C = A*B
+
     for j = 1:n
         brows = find (B (:,j)) ;
         brows = brows (:)' ;
@@ -24,31 +31,46 @@
 
 else
 
+    % C<M>=A*B and C<!M>=A*B
+
+    mask_is_M = (~Mask_complement) ;    % true for C<M>=A*B
+
     for j = 1:n
         brows = find (B (:,j)) ;
+        if (isempty (brows))
+            continue ;
+        end
         mrows = find (M (:,j)) ;
-        if (isempty (brows) || isempty (mrows))
+        mjnz = length (mrows) ;
+        
+        if (mask_is_M & mjnz == 0)
             continue ;
         end
-        % mrows
         imin = min (mrows) ;
         imax = max (mrows) ;
         brows = brows (:)' ;
+
+        flops (j) = flops (j) + mjnz ;
+        mwork = mwork + mjnz ;
+
         for k = brows
             [arows ignore] = find (A (:,k)) ;
             if (isempty (arows))
                 % A(:,k) is empty
                 continue ;
             end
-            amin = min (arows) ;
-            amax = max (arows) ;
-            if (amax < imin || amin > imax)
-                % intersection of A(:,k) and M(:,j) is empty
-                continue ;
+            if (mask_is_M)
+                amin = min (arows) ;
+                amax = max (arows) ;
+                if (amax < imin || amin > imax)
+                    % intersection of A(:,k) and M(:,j) is empty
+                    continue ;
+                end
             end
             flops (j) = flops (j) + nnz (A (:,k)) ;
         end
     end
 
 end
+
 flops = cumsum ([0 flops]) ;
diff --git a/Test/floptest.m b/Test/floptest.m
index 14854cb88c..b02bd07db4 100644
--- a/Test/floptest.m
+++ b/Test/floptest.m
@@ -1,31 +1,28 @@
-function floptest (M, A, B, floplimit, mflops)
+function floptest (M, Mask_complement, A, B, flops1)
 %FLOPTEST compare flopcount with GB_mex_mxm_flops
-% floptest (M, A, B, floplimit, mflops)
+% floptest (M, Mask_complement, A, B, flops1)
 %
 % compares the results of
-% mflops = flopcount (M, A, B) ;
+% flops1 = flopcount (M, Mask_complement, A, B)
 % with
-% [result bflops] = GB_mex_mxm_flops (M,A,B,floplimit)
+% flops2 = GB_mex_mxm_flops (M, Mask_complement, A, B)
 %
-% However, flopcount(M,A,B) can only be computed when M, A, B are all MATLAB
-% sparse matrices, not structs.  If the matrices are hypersparse, bflops has
-% length B->nvec+1, not size(B,2).  In this case, the last entries of both
-% mflops and bflops must match (equal to the total flops), and to do the test,
-% mflops(end) is passed to this function in instead of all of mflops.
+% However, flopcount(M,Mask_complement,A,B) can only be computed when M, A, B
+% are all MATLAB sparse matrices, not structs.  If the matrices are
+% hypersparse, flops1 has length B->nvec+1, not size(B,2).  In this case,
+% only the total flop count is checked.  In that case, flops1 is a scalar.
 
-% get both the result of the test, and bflops
-[result bflops] = GB_mex_mxm_flops (M, A, B, floplimit) ;
-total_flops = bflops (end) ;
-assert (result == (total_flops <= floplimit)) ;
-if (isscalar (mflops))
-    % mflops is just the total flop count
-    assert (isequal (mflops, bflops (end)))
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+[flops2 mwork] = GB_mex_mxm_flops (M, Mask_complement, A, B) ;
+total_flops = flops2 (end) ;
+
+if (isscalar (flops1))
+    % flops1 is just the total flop count
+    assert (isequal (flops1, total_flops)) ;
 else
-    % mflops is the cumulative sum
-    assert (isequal (mflops, bflops))
+    % flops1 is the cumulative sum
+    assert (isequal (flops1, flops2)) ;
 end
 
-% just get the result, not bflops:
-result = GB_mex_mxm_flops (M, A, B, floplimit) ;
-assert (result == (total_flops <= floplimit)) ;
-
diff --git a/Test/gbclear.m b/Test/gbclear.m
new file mode 100644
index 0000000000..b948f62f1a
--- /dev/null
+++ b/Test/gbclear.m
@@ -0,0 +1,12 @@
+%GBCLEAR clear and reload GraphBLAS
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+try
+    GrB.finalize
+    clear all
+    GrB.init
+catch me
+    me
+end
diff --git a/Test/grb_clear_coverage.m b/Test/grb_clear_coverage.m
new file mode 100644
index 0000000000..c0c4e8019c
--- /dev/null
+++ b/Test/grb_clear_coverage.m
@@ -0,0 +1,9 @@
+function c = grb_clear_coverage
+%GRB_CLEAR_COVERAGE clear current statement coverage
+
+try
+    global GraphBLAS_debug GraphBLAS_grbcov
+    GraphBLAS_grbcov (:) = 0 ;
+catch
+end
+
diff --git a/Test/grb_get_coverage.m b/Test/grb_get_coverage.m
new file mode 100644
index 0000000000..1828e71bec
--- /dev/null
+++ b/Test/grb_get_coverage.m
@@ -0,0 +1,10 @@
+function c = grb_get_coverage
+%GRB_GET_COVERAGE return current statement coverage
+
+c = 0 ;
+try
+    global GraphBLAS_debug GraphBLAS_grbcov
+    c = sum (GraphBLAS_grbcov > 0) ;
+catch
+end
+
diff --git a/Test/grbinfo.m b/Test/grbinfo.m
index 674e6d8efa..5ed7e812df 100644
--- a/Test/grbinfo.m
+++ b/Test/grbinfo.m
@@ -3,6 +3,9 @@
 %
 % nthreads = grbinfo
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 [nthreads threading thread_safety format hyperratio ... 
 name version date about license compiledate compiletime api api_about ...
 chunk ] = GB_mex_init ;
diff --git a/Test/grbresults.m b/Test/grbresults.m
index ca659671ba..5c00fff6b1 100644
--- a/Test/grbresults.m
+++ b/Test/grbresults.m
@@ -1,5 +1,9 @@
 function [t method] = grbresults
 %GRBRESULTS return time taken by last GraphBLAS function, and AxB method
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 global GraphBLAS_results
 t = GraphBLAS_results (1) ;
 method = GraphBLAS_results (2) ;
@@ -12,6 +16,10 @@
     method = 'heap' ;
 elseif method == 1003 || method == 6
     method = 'dot' ;
+elseif method == 1004
+    method = 'hash' ;
+elseif method == 1005
+    method = 'saxpy' ;
 else
     error ('invalid method') ;
 end
diff --git a/Test/irand.m b/Test/irand.m
index 071ba9dbd4..e874354d9e 100644
--- a/Test/irand.m
+++ b/Test/irand.m
@@ -6,7 +6,7 @@
 %
 % I = irand (imin, imax, m, n)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 4)
diff --git a/Test/isequal_roundoff.m b/Test/isequal_roundoff.m
index 1ab9a917dd..9d311dae61 100644
--- a/Test/isequal_roundoff.m
+++ b/Test/isequal_roundoff.m
@@ -5,6 +5,9 @@
 % tol = 64*eps if not present.  NaNs and Infs are ignored in the
 % tol, but the NaN and +/-Inf pattern must be the same.
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 if (~isequal (class (A), class (B)))
     ok = false ;
     return ;
@@ -46,4 +49,3 @@
 end
 ok = (err == 0) || (err <= tol * anorm) ;
 
-
diff --git a/Test/logstat.m b/Test/logstat.m
index 32d5550551..7e7ae5ed1b 100644
--- a/Test/logstat.m
+++ b/Test/logstat.m
@@ -1,11 +1,13 @@
 function logstat (testscript, threads)
 %LOGSTAT run a GraphBLAS test and log the results to log.txt 
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [debug, compact, malloc, covered] = GB_mex_debug ;
 
+clast = grb_get_coverage ;
+
 if (nargin < 2)
     % by default, use 4 threads and a tiny chunk size of 1
     threads {1} = [4 1] ;
@@ -77,8 +79,17 @@ function logstat (testscript, threads)
         if (~isempty (GraphBLAS_grbcov))
             c = sum (GraphBLAS_grbcov > 0) ;
             n = length (GraphBLAS_grbcov) ;
-            fprintf (   'coverage: %5d of %5d (%5.1f%%)', c, n, 100 * (c/n)) ;
-            fprintf (f, 'coverage: %5d of %5d (%5.1f%%)', c, n, 100 * (c/n)) ;
+            if (c == n)
+                fprintf (   'coverage: %5d :   all %5d (full 100%% rate: %8.2f/sec)', ...
+                    c - clast, n, (c-clast) / t) ;
+                fprintf (f, 'coverage: %5d :   all %5d (full 100%% rate: %8.2f/sec)', ...
+                    c - clast, n, (c-clast) / t) ;
+            else
+                fprintf (   'coverage: %5d : %5d of %5d (%5.1f%% rate: %8.2f/sec)', ...
+                    c - clast, c, n, 100 * (c/n), (c-clast) / t) ;
+                fprintf (f, 'coverage: %5d : %5d of %5d (%5.1f%% rate: %8.2f/sec)', ...
+                    c - clast, c, n, 100 * (c/n), (c-clast) / t) ;
+            end
             if (debug)
                 fprintf (' [debug]') ;
             end
diff --git a/Test/longtests.m b/Test/longtests.m
index 09df4edeeb..4be57fdec3 100644
--- a/Test/longtests.m
+++ b/Test/longtests.m
@@ -1,5 +1,8 @@
 %LONGTESTS very long tests
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 nthreads_set (4,1) ;
 debug_off 
 
diff --git a/Test/make.m b/Test/make.m
index aff6ef1fb9..84da62eefb 100644
--- a/Test/make.m
+++ b/Test/make.m
@@ -14,7 +14,7 @@ function make (what)
 % GraphBLAS requires an ANSI C11 compliant compiler.  On the Mac, clang 8.0
 % suffices.  gcc should be version 4.9.3 or later
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (isempty (strfind (pwd, 'GraphBLAS/Test')))
@@ -39,10 +39,8 @@ function make (what)
 
 make_all = (isequal (what, 'all')) ;
 
-%  flags = '-g' ;
-   flags = '-O' ;
-
-flags = [flags ' -largeArrayDims'] ;
+% flags = '-g' ;
+  flags = '-O' ;
 
 try
     if (strncmp (computer, 'GLNX', 4))
@@ -63,6 +61,7 @@ function make (what)
             end
         end
     end
+catch
 end
 
 mexfunctions = dir ('GB_mex_*.c') ;
@@ -101,6 +100,8 @@ function make (what)
     flags = [ flags  ' LDFLAGS="$LDFLAGS  -fopenmp -fPIC" '] ;
 end
 
+flags = [flags ' -largeArrayDims'] ;
+
 %-------------------------------------------------------------------------------
 
 dryrun = false ;
diff --git a/Test/mtest.m b/Test/mtest.m
index 75a0661334..9141a659e6 100644
--- a/Test/mtest.m
+++ b/Test/mtest.m
@@ -1,4 +1,8 @@
 % test mxm
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/nthreads_get.m b/Test/nthreads_get.m
index 5319fc94ac..53b013c3ef 100644
--- a/Test/nthreads_get.m
+++ b/Test/nthreads_get.m
@@ -3,7 +3,7 @@
 %
 % [nthreads chunk] = nthreads_get
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 global GraphBLAS_nthreads
diff --git a/Test/nthreads_set.m b/Test/nthreads_set.m
index 152a5f1de4..45bf11544b 100644
--- a/Test/nthreads_set.m
+++ b/Test/nthreads_set.m
@@ -6,7 +6,7 @@
 % If nthreads is empty, or if no input arguments, nthreads is set to 1.
 % If chunk is empty, or if no input arguments, chunk is not modified.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 global GraphBLAS_nthreads
diff --git a/Test/rtest.m b/Test/rtest.m
index ae282b50d3..cbab54783b 100644
--- a/Test/rtest.m
+++ b/Test/rtest.m
@@ -1,4 +1,8 @@
 % test GrB_reduce to vector and scalar
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 
diff --git a/Test/runtest.m b/Test/runtest.m
index 79f621a1e1..505d51cf22 100644
--- a/Test/runtest.m
+++ b/Test/runtest.m
@@ -1,7 +1,7 @@
 function runtest (testscript)
 %RUNTEST run a single GraphBLAS test
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 %  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 eval (testscript) ;
diff --git a/Test/ss.m b/Test/ss.m
index 43002dcee1..b24dddc88d 100644
--- a/Test/ss.m
+++ b/Test/ss.m
@@ -1,4 +1,8 @@
 % test GxB_select
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 
diff --git a/Test/startup.m b/Test/startup.m
deleted file mode 100644
index 724c4cac74..0000000000
--- a/Test/startup.m
+++ /dev/null
@@ -1,3 +0,0 @@
-%STARTUP setup the path for tests in GraphBLAS/Test
-addpath ('spok')
-
diff --git a/Test/stat.m b/Test/stat.m
index 31ba019dd6..14dcb43811 100644
--- a/Test/stat.m
+++ b/Test/stat.m
@@ -1,7 +1,7 @@
 function [d nthreads] = stat
 %STAT report status of statement coverage and malloc debugging
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 global GraphBLAS_debug GraphBLAS_grbcov
diff --git a/Test/stest.m b/Test/stest.m
index 92566937ab..ab93750bca 100644
--- a/Test/stest.m
+++ b/Test/stest.m
@@ -1,4 +1,8 @@
 % test GxB_select
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 
diff --git a/Test/t74.m b/Test/t74.m
new file mode 100644
index 0000000000..351a8bf17f
--- /dev/null
+++ b/Test/t74.m
@@ -0,0 +1,13 @@
+%T74 run test20 and test74
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+clear all
+gbclear
+make
+threads {1} = [4 1] ;
+t = threads ;
+logstat ('test20',t) ;  % quick test of GB_mex_mxm on a few semirings
+logstat ('test74',t) ;  % test GrB_mxm on all semirings
+
diff --git a/Test/test00.m b/Test/test00.m
index 31ad555a7f..5bd8055d6e 100644
--- a/Test/test00.m
+++ b/Test/test00.m
@@ -1,7 +1,7 @@
 function test00
 %TEST00 test GB_mex_mis
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest00: MIS\n') ;
diff --git a/Test/test01.m b/Test/test01.m
index 922d4bf688..e5a7a651ec 100644
--- a/Test/test01.m
+++ b/Test/test01.m
@@ -1,7 +1,7 @@
 function test01
 %TEST01 test GraphBLAS error handling
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 GB_mex_about ;
diff --git a/Test/test02.m b/Test/test02.m
index 13c46ba513..08f750d870 100644
--- a/Test/test02.m
+++ b/Test/test02.m
@@ -1,7 +1,7 @@
 function test02
 %TEST02 test GrB_*_dup
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test03.m b/Test/test03.m
index 2c8037d406..d5fb6f3789 100644
--- a/Test/test03.m
+++ b/Test/test03.m
@@ -1,7 +1,7 @@
 function test03
 %TEST03 test GB_*_check functions
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test04.m b/Test/test04.m
index b86eec686a..13474e2f1a 100644
--- a/Test/test04.m
+++ b/Test/test04.m
@@ -1,7 +1,7 @@
 function test04
 %TEST04 test and demo for accumulator/mask and transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n-------------------- simple mask and transpose tests\n') ;
@@ -12,15 +12,15 @@
 Mask = mod (Z,2) == 0
 
 for C_replace = [false true]
-    for Mask_scmp = [false true]
-        fprintf ('Use Mask.  C_replace: %d Mask_scmp: %d\n', ...
-            C_replace, Mask_scmp) ;
-        Cresult = GB_spec_mask (C, Mask, Z, C_replace, Mask_scmp)
-        % C2 =  apply_mask_sparse (C, Z, Mask, C_replace, Mask_scmp) ;
+    for Mask_comp = [false true]
+        fprintf ('Use Mask.  C_replace: %d Mask_comp: %d\n', ...
+            C_replace, Mask_comp) ;
+        Cresult = GB_spec_mask (C, Mask, Z, C_replace, Mask_comp) ;
+        % C2 =  apply_mask_sparse (C, Z, Mask, C_replace, Mask_comp) ;
         % assert (isequal (Cresult, C2))
 
         D = [ ] ;
-        if (Mask_scmp)
+        if (Mask_comp)
             D.mask = 'scmp' ;
         end
         if (C_replace)
@@ -37,15 +37,15 @@
 end
 
 for C_replace = [false true]
-    for Mask_scmp = [false true]
-        fprintf ('No Mask.  C_replace: %d Mask_scmp: %d\n', ...
-            C_replace, Mask_scmp) ;
-        Cresult = GB_spec_mask (C, [ ], Z, C_replace, Mask_scmp)
-        % C2 = apply_mask_sparse  (C, Z, [ ], C_replace, Mask_scmp) ;
+    for Mask_comp = [false true]
+        fprintf ('No Mask.  C_replace: %d Mask_comp: %d\n', ...
+            C_replace, Mask_comp) ;
+        Cresult = GB_spec_mask (C, [ ], Z, C_replace, Mask_comp) ;
+        % C2 = apply_mask_sparse  (C, Z, [ ], C_replace, Mask_comp) ;
         % assert (isequal (Cresult, C2))
 
         D = [ ] ;
-        if (Mask_scmp)
+        if (Mask_comp)
             D.mask = 'scmp' ;
         end
         if (C_replace)
@@ -53,7 +53,9 @@
         end
 
         A = Z ;
-        fprintf ('C3 <no mask just with scmp> = C + A'' :\n') ;
+        fprintf ('C3 <no mask scmp:%d replace:%d> = C + A'' :\n', ...
+            Mask_comp, C_replace) ;
+D
         C3 = GB_spec_transpose (C, [ ], 'plus', A, D) ;
         C5 = GB_mex_transpose  (sparse(C), [ ], 'plus', sparse(A), D);
         assert (isequal (C3.matrix, C5.matrix))
diff --git a/Test/test05.m b/Test/test05.m
index 7799810e12..b42dee6186 100644
--- a/Test/test05.m
+++ b/Test/test05.m
@@ -1,7 +1,7 @@
 function test05
 %TEST05 test GrB_*_setElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 A = sparse (5,5) ;
diff --git a/Test/test06.m b/Test/test06.m
index db5d83f339..517abc8f5c 100644
--- a/Test/test06.m
+++ b/Test/test06.m
@@ -9,7 +9,7 @@ function test06 (A,B,fulltests,method_list)
 % matrix id number from the SuiteSparse collection otherwise A is the sparse
 % matrix to use in the test
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test06: GrB_mxm on all semirings\n') ;
@@ -95,10 +95,10 @@ function test06 (A,B,fulltests,method_list)
 
 tic
 C = A'*B' ;
-tm4 = toc ;
+tm5 = toc ;
 
 if (n > 500)
-    fprintf ('MATLAB time: %g %g %g %g\n', tm1, tm2, tm3, tm4) ;
+    fprintf ('MATLAB time: %g %g %g %g\n', tm1, tm2, tm3, tm5) ;
     fprintf ('with mask:\n') ;
 end
 
@@ -116,10 +116,10 @@ function test06 (A,B,fulltests,method_list)
 
 tic
 C = (A'*B') .* Mask ;
-tmm4 = toc ;
+tmm5 = toc ;
 
 if (n > 500)
-    fprintf ('MATLAB time: %g %g %g %g\n', tmm1, tmm2, tmm3, tmm4) ;
+    fprintf ('MATLAB time: %g %g %g %g\n', tmm1, tmm2, tmm3, tmm5) ;
 end
 
 dnn = struct ;
@@ -204,9 +204,9 @@ function test06 (A,B,fulltests,method_list)
                 end
 
                 if (method == 1)
-                    algo = 'heap' ;
+                    algo = 'hash' ;
                     if (n > 500)
-                        fprintf ('heap ') ;
+                        fprintf ('hash ') ;
                     end
                 elseif (method == 2)
                     algo = 'gustavson' ;
@@ -224,7 +224,6 @@ function test06 (A,B,fulltests,method_list)
                         fprintf ('auto ') ;
                     end
                 end
-
                 if (isequal (algo, 'dot'))
                     ok = (n < 1000) ;
                 else
@@ -286,7 +285,7 @@ function test06 (A,B,fulltests,method_list)
                     fprintf (...
                     'speedups %10.4f(%s) %10.4f(%s) %10.4f(%s) %10.4f(%s) ', ...
                     tm1/t1, method1(1), tm2/t2, method2(1), ...
-                    tm3/t3, method3(1), tm4/t4, method4(1)) ;
+                    tm3/t3, method3(1), tm5/t4, method4(1)) ;
                 end
 
                 % C = A*B, with mask
@@ -329,7 +328,7 @@ function test06 (A,B,fulltests,method_list)
                     fprintf (...
                     'speedups %10.4f(%s) %10.4f(%s) %10.4f(%s) %10.4f(%s) ', ...
                     tmm1/t1, method1m(1), tmm2/t2, method2m(1), ...
-                    tmm3/t3, method3m(1), tmm4/t4, method4m(1)) ;
+                    tmm3/t3, method3m(1), tmm5/t4, method4m(1)) ;
                     fprintf ('\n') ;
                 end
 
diff --git a/Test/test07.m b/Test/test07.m
index 8d85fcdf91..81eb51f193 100644
--- a/Test/test07.m
+++ b/Test/test07.m
@@ -1,7 +1,7 @@
 function test07
 %TEST07 test GxB_subassign with a single pending tuple
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % adds a single pending tuple
diff --git a/Test/test07b.m b/Test/test07b.m
index 3a4dd1107d..a37028e865 100644
--- a/Test/test07b.m
+++ b/Test/test07b.m
@@ -1,7 +1,7 @@
 function test07b
 %TEST07B test GrB_assign with a single pending tuple
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % adds a single pending tuple
diff --git a/Test/test08.m b/Test/test08.m
index 6f5699146f..3f3d6d5f6c 100644
--- a/Test/test08.m
+++ b/Test/test08.m
@@ -1,7 +1,7 @@
 function test08
 %TEST08 test GxB_subassign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test08b.m b/Test/test08b.m
index f03c34c281..43f32f6ee2 100644
--- a/Test/test08b.m
+++ b/Test/test08b.m
@@ -1,7 +1,7 @@
 function test08b
 %TEST08B test GrB_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test09.m b/Test/test09.m
index 2b3ec03aaf..36f81cd5c3 100644
--- a/Test/test09.m
+++ b/Test/test09.m
@@ -1,7 +1,7 @@
 function test09
 %TEST09 test GxB_subassign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n-----------duplicate I,J test of GB_mex_subassign\n') ;
diff --git a/Test/test09b.m b/Test/test09b.m
index e2e7742b80..4695e386ca 100644
--- a/Test/test09b.m
+++ b/Test/test09b.m
@@ -1,7 +1,7 @@
 function test09b
 %TEST09B test GrB_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n-----------duplicate I,J test of GB_mex_assign\n') ;
diff --git a/Test/test10.m b/Test/test10.m
index 3b3b6787db..5a4bc2a1c2 100644
--- a/Test/test10.m
+++ b/Test/test10.m
@@ -1,7 +1,7 @@
 function test10
 %TEST10 test GrB_apply
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\nquick GrB_apply tests\n') ;
diff --git a/Test/test100.m b/Test/test100.m
index 419f6dd996..a3f38e50a3 100644
--- a/Test/test100.m
+++ b/Test/test100.m
@@ -1,7 +1,7 @@
 function test100 
 %TEST100 test GB_mex_isequal
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test101.m b/Test/test101.m
index e0990f3e7e..9f3eb1fab8 100644
--- a/Test/test101.m
+++ b/Test/test101.m
@@ -1,7 +1,7 @@
 function test101
 %TEST101 test import/export
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % clear all
diff --git a/Test/test102.m b/Test/test102.m
index cb94f77c52..b571739a32 100644
--- a/Test/test102.m
+++ b/Test/test102.m
@@ -1,6 +1,9 @@
 function test102
 %TEST102 test GB_AxB_flopcount
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('\ntest102: testing GB_AxB_flopcount\n') ;
 
 rng ('default') ;
@@ -30,26 +33,26 @@
                 Ahyper.is_hyper = true ;
 
                 % flop counts for C=A*B
-                mflops = flopcount ([ ], A, B) ;
+                mflops = flopcount ([ ], 0, A, B) ;
                 total = mflops (end) ;
 
-                floptest ([ ], A, B, m, mflops) ;
-                floptest ([ ], Ahyper, Bhyper, m, total) ;
-                floptest ([ ], Ahyper, B, m, total) ;
-                floptest ([ ], A, Bhyper, m, total) ;
+                floptest ([ ], 0, A, B, mflops) ;
+                floptest ([ ], 0, Ahyper, Bhyper, total) ;
+                floptest ([ ], 0, Ahyper, B, total) ;
+                floptest ([ ], 0, A, Bhyper, total) ;
 
                 % flop counts for C<M>=A*B
-                mflops = flopcount (M, A, B) ;
+                mflops = flopcount (M, 0, A, B) ;
                 total = mflops (end) ;
 
-                floptest (M, A, B, m, mflops) ;
-                floptest (M, A, Bhyper, m, total) ;
-                floptest (M, Ahyper, B, m, total) ;
-                floptest (M, Ahyper, Bhyper, m, total) ;
-                floptest (Mhyper, A, B, m, total) ;
-                floptest (Mhyper, Ahyper, B, m, total) ;
-                floptest (Mhyper, A, Bhyper, m, total) ;
-                floptest (Mhyper, A, B, m, total) ;
+                floptest (M, 0, A, B, mflops) ;
+                floptest (M, 0, A, Bhyper, total) ;
+                floptest (M, 0, Ahyper, B, total) ;
+                floptest (M, 0, Ahyper, Bhyper, total) ;
+                floptest (Mhyper, 0, A, B, total) ;
+                floptest (Mhyper, 0, Ahyper, B, total) ;
+                floptest (Mhyper, 0, A, Bhyper, total) ;
+                floptest (Mhyper, 0, A, B, total) ;
 
             end
         end
diff --git a/Test/test103.m b/Test/test103.m
index 7c6cbe81e6..1a41ab1dfc 100644
--- a/Test/test103.m
+++ b/Test/test103.m
@@ -1,6 +1,9 @@
 function test103
 %TEST103 test aliases in GrB_transpose
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng 'default'
 fprintf ('\ntest103: test aliases in GrB_transpose\n') ;
 
diff --git a/Test/test104.m b/Test/test104.m
index 7dd5aea499..eae3da7814 100644
--- a/Test/test104.m
+++ b/Test/test104.m
@@ -1,6 +1,9 @@
 function test104
 %TEST104 export/import
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng 'default'
 fprintf ('\ntest104: export/import tests\n') ;
 
diff --git a/Test/test105.m b/Test/test105.m
index cf5f99d716..c26158e9f9 100644
--- a/Test/test105.m
+++ b/Test/test105.m
@@ -1,6 +1,9 @@
 function test105
 %TEST105 eWiseAdd with hypersparse matrices
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng 'default'
 fprintf ('\ntest105: eWiseAdd with hypersparse\n') ;
 
diff --git a/Test/test106.m b/Test/test106.m
index d0bf2af4aa..fc4d5b986c 100644
--- a/Test/test106.m
+++ b/Test/test106.m
@@ -1,6 +1,9 @@
 function test106
 %TEST106 GxB_subassign with alias
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng 'default'
 fprintf ('\ntest106: GxB_subassign with alias\n') ;
 
diff --git a/Test/test107.m b/Test/test107.m
index f95f9527ea..4b452f79d1 100644
--- a/Test/test107.m
+++ b/Test/test107.m
@@ -1,6 +1,9 @@
 function test107
 %TEST107 user-defined terminal monoid
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test107: reduce with built-in and  user-defined terminal monoids\n') ;
 
 rng ('default') ;
@@ -72,18 +75,18 @@
 
     tic
     for trial = 1:ntrials
-        c2 = GB_mex_reduce_terminal (A, 1) ;    % user-defined at compile-time
+        c2 = GB_mex_reduce_terminal (A, 1) ;    % user-defined
     end
     t2 = toc ;
-    fprintf ('nthreads %3d compile-time  %g\n', nthreads, t2) ;
+    fprintf ('nthreads %3d %g\n', nthreads, t2) ;
     assert (s == c2) ;
 
     tic
     for trial = 1:ntrials
-        c3 = GB_mex_reduce_terminal (A, 2) ;        % user-defined at run-time
+        c3 = GB_mex_reduce_terminal (A, 2) ;    % user-defined
     end
     t3 = toc ;
-    fprintf ('nthreads %3d run-time      %g\n', nthreads, t3) ;
+    fprintf ('nthreads %3d %g\n', nthreads, t3) ;
     assert (s == c3) ;
 
 end
@@ -113,10 +116,10 @@
     fprintf ('nthreads %3d built-in      %g\n', nthreads, t1) ;
     tic
     for trial = 1:ntrials
-        c2 = GB_mex_reduce_terminal (A, 1) ;    % user-defined at compile-time
+        c2 = GB_mex_reduce_terminal (A, 1) ;    % user-defined
     end
     t2 = toc ;
-    fprintf ('nthreads %3d compile-time  %g\n', nthreads, t2) ;
+    fprintf ('nthreads %3d %g\n', nthreads, t2) ;
     assert (s == c1) ;
     assert (s == c2) ;
 end
@@ -149,7 +152,7 @@
         c2 = GB_mex_reduce_terminal (A, inf) ;
     end
     t2 = toc ;
-    fprintf ('nthreads %3d compile-time  %g\n', nthreads, t2) ;
+    fprintf ('nthreads %3d %g\n', nthreads, t2) ;
     assert (s == c1) ;
     assert (s == c2) ;
 end
@@ -182,7 +185,7 @@
         c2 = GB_mex_reduce_terminal (A, 2) ;
     end
     t2 = toc ;
-    fprintf ('nthreads %3d compile-time  %g\n', nthreads, t2) ;
+    fprintf ('nthreads %3d %g\n', nthreads, t2) ;
     assert (s == c1) ;
     assert (s == c2) ;
 end
diff --git a/Test/test108.m b/Test/test108.m
index 0675780463..dd21eac674 100644
--- a/Test/test108.m
+++ b/Test/test108.m
@@ -1,6 +1,9 @@
 function test108
 %TEST108 test boolean monoids
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % only well-defined if op is associative
 
 ops = {
diff --git a/Test/test109.m b/Test/test109.m
index d1c4d61041..c671c11fec 100644
--- a/Test/test109.m
+++ b/Test/test109.m
@@ -1,6 +1,9 @@
 function test109
 %TEST109 terminal monoid with user-defined type
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('\ntest109: terminal monoid with user-defined type\n') ;
 
 rng ('default') ;
diff --git a/Test/test11.m b/Test/test11.m
index 22ef6f36ea..38e96fdede 100644
--- a/Test/test11.m
+++ b/Test/test11.m
@@ -1,7 +1,7 @@
 function test11
 %TEST11 test GrB_*_extractTuples
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test110.m b/Test/test110.m
index 4ba68f38de..b76b5ec0a4 100644
--- a/Test/test110.m
+++ b/Test/test110.m
@@ -1,6 +1,9 @@
 function test110
 %TEST110 test accum/mask (binary search of M(:,j))
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('\ntest110:  test accum/mask (binary search of M(:,j))\n') ;
 
 rng ('default')
diff --git a/Test/test111.m b/Test/test111.m
index 9120cedcf5..4eef40a8bf 100644
--- a/Test/test111.m
+++ b/Test/test111.m
@@ -1,7 +1,7 @@
 function test111
 %TEST111 performance test for eWiseAdd
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest111 performance tests : eWiseAdd \n') ;
diff --git a/Test/test112.m b/Test/test112.m
index de3cffd5c6..966493599c 100644
--- a/Test/test112.m
+++ b/Test/test112.m
@@ -1,6 +1,9 @@
 function test112
 %TEST112 test row/col scale
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test112: row/col scale\n') ;
 
 rng ('default') ;
diff --git a/Test/test113.m b/Test/test113.m
index 02fc046380..e66fe08489 100644
--- a/Test/test113.m
+++ b/Test/test113.m
@@ -1,6 +1,9 @@
 function test113
 %TEST113 performance tests for GrB_kron
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test113: performance tests for GrB_kron\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test114.m b/Test/test114.m
index b35f7981e3..14d9d62c54 100644
--- a/Test/test114.m
+++ b/Test/test114.m
@@ -1,6 +1,9 @@
 function test114
 %TEST114 performance of reduce-to-scalar
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test115.m b/Test/test115.m
index b9d50ece9e..938592b1cb 100644
--- a/Test/test115.m
+++ b/Test/test115.m
@@ -1,7 +1,7 @@
 function test115
 %TEST115 test GB_assign, scalar expansion and zombies, with duplicates
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test116.m b/Test/test116.m
index 03bab19c56..7bad954783 100644
--- a/Test/test116.m
+++ b/Test/test116.m
@@ -1,6 +1,9 @@
 function test116
 %TEST116 performance tests for GrB_assign
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test116:---------------- C(I,J)=A and C=A(I,J) performance\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test117.m b/Test/test117.m
index e0f268f450..5e00d54955 100644
--- a/Test/test117.m
+++ b/Test/test117.m
@@ -3,6 +3,9 @@
 
 % test C(:,:)<M> += A
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test117 ----------------------------------- C(:,:)<M> += A\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test118.m b/Test/test118.m
index ed7fdf601c..d99247ffa5 100644
--- a/Test/test118.m
+++ b/Test/test118.m
@@ -3,6 +3,9 @@
 
 % test C(:,:)<M> = A
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test118 ----------------------------------- C(:,:)<M> = A\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test119.m b/Test/test119.m
index f20dfcaa6c..1292396ce9 100644
--- a/Test/test119.m
+++ b/Test/test119.m
@@ -1,6 +1,9 @@
 function test119
 %TEST119 performance tests for GrB_assign
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test119:-------------------  C(I,J) += scalar:\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test12.m b/Test/test12.m
index e9859639e5..ac7ba0c7e4 100644
--- a/Test/test12.m
+++ b/Test/test12.m
@@ -6,7 +6,7 @@ function test12 (cover)
 % if cover=1, do quick statement coverage tests
 % if cover=0, run larger problems
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test120.m b/Test/test120.m
index e72d64aba4..7393adc9b2 100644
--- a/Test/test120.m
+++ b/Test/test120.m
@@ -1,6 +1,9 @@
 function test120
 %TEST120 performance tests for GrB_assign
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test120:-------------------  C(I,J)<!M> += scalar:\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test121.m b/Test/test121.m
index dd3bca1f51..bc1034aedd 100644
--- a/Test/test121.m
+++ b/Test/test121.m
@@ -1,6 +1,9 @@
 function test121
 %TEST121 performance tests for GrB_assign
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test121:---------------- C(I,J)+=A performance\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test122.m b/Test/test122.m
index 5b71cedd51..b1da727f15 100644
--- a/Test/test122.m
+++ b/Test/test122.m
@@ -1,6 +1,9 @@
 function test122
 %TEST122 performance tests for GrB_assign
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test120:-------------------  C(I,J)<!M> += A:\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test123.m b/Test/test123.m
index c386ff196f..66a2d39cbb 100644
--- a/Test/test123.m
+++ b/Test/test123.m
@@ -1,6 +1,9 @@
 function test123
 %TEST123 test MIS on large matrix
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test123: test MIS on large matrix\n') ;
 
 Prob = ssget (2662)
diff --git a/Test/test124.m b/Test/test124.m
index abcc836850..601d4cacc8 100644
--- a/Test/test124.m
+++ b/Test/test124.m
@@ -1,6 +1,9 @@
 function test124
 %TEST124 GrB_extract, trigger case 6
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test124: GrB_extract, trigger case 6\n') ;
 
 rng ('default') ;
diff --git a/Test/test125.m b/Test/test125.m
index 7e2e61e3d9..72156a57dc 100644
--- a/Test/test125.m
+++ b/Test/test125.m
@@ -2,7 +2,7 @@
 %TEST125 test GrB_mxm: row and column scaling
 % all built-in semirings, no typecast, no mask
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test126.m b/Test/test126.m
index bb45aa44d3..792b6d5ee5 100644
--- a/Test/test126.m
+++ b/Test/test126.m
@@ -1,7 +1,7 @@
 function test126
 %TEST126 test GrB_reduce to vector on a very sparse matrix 
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test126:  test GrB_reduce to vector on a very sparse matrix\n') ;
diff --git a/Test/test127.m b/Test/test127.m
index 8541ca4155..6b21620840 100644
--- a/Test/test127.m
+++ b/Test/test127.m
@@ -1,7 +1,7 @@
 function test127
 %TEST127 test GrB_eWiseAdd and GrB_eWiseMult (all types and operators)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [bin_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test128.m b/Test/test128.m
index 434d55b741..2bac03b262 100644
--- a/Test/test128.m
+++ b/Test/test128.m
@@ -4,6 +4,9 @@
 % C = GB_mex_eWiseMult_Matrix (C, Mask, accum, mult, A, B, desc)
 % C = GB_mex_eWiseAdd_Matrix  (C, Mask, accum, add,  A, B, desc, test)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('\ntest128: test eWiseMult and eWiseAdd, special cases\n') ;
 rng ('default') ;
 
diff --git a/Test/test129.m b/Test/test129.m
index 442b3515b8..7dc4594a81 100644
--- a/Test/test129.m
+++ b/Test/test129.m
@@ -1,11 +1,11 @@
 function test129
 %TEST129 test GxB_select (tril and nonzero, hypersparse)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
-
 % This is a shorter version of test25
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('\ntest129: GxB_select tests (tril and nonzero)\n') ;
 
 [~, ~, ~, classes, ~, select_ops] = GB_spec_opsall ;
diff --git a/Test/test13.m b/Test/test13.m
index 591c4e8d34..53b7b6a3d1 100644
--- a/Test/test13.m
+++ b/Test/test13.m
@@ -1,7 +1,7 @@
 function test13
 %TEST13 test GrB_tranpsose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 cinclass  = 'int16' ;
diff --git a/Test/test130.m b/Test/test130.m
index 0faf127e44..24fcdecdbe 100644
--- a/Test/test130.m
+++ b/Test/test130.m
@@ -1,7 +1,7 @@
 function test130
 %TEST130 test GrB_apply (hypersparse cases)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest130: quick GrB_apply tests\n') ;
diff --git a/Test/test131.m b/Test/test131.m
index b629d40e1e..ab4668fa45 100644
--- a/Test/test131.m
+++ b/Test/test131.m
@@ -1,7 +1,7 @@
 function test131
 %TEST131 test GrB_Matrix_clear
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest131: GrB_Matrix_clear\n') ;
diff --git a/Test/test132.m b/Test/test132.m
index 499fca848e..663ce8a715 100644
--- a/Test/test132.m
+++ b/Test/test132.m
@@ -1,7 +1,7 @@
 function test132
 %TEST132 test GrB_*_setElement and GrB_*_*build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % simplified from test45
diff --git a/Test/test133.m b/Test/test133.m
index 9b6e0cc850..20e175252f 100644
--- a/Test/test133.m
+++ b/Test/test133.m
@@ -1,7 +1,7 @@
 function test133
 %TEST133 test mask operations (GB_masker)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % C = GB_mex_transpose (C, M, accum, A, desc, test)
diff --git a/Test/test134.m b/Test/test134.m
index cbb5d0e06c..7e08b6ebba 100644
--- a/Test/test134.m
+++ b/Test/test134.m
@@ -1,7 +1,7 @@
 function test134
 %TEST134 test GxB_select
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % A shorter version of test25
diff --git a/Test/test135.m b/Test/test135.m
index c668800f65..77308833e6 100644
--- a/Test/test135.m
+++ b/Test/test135.m
@@ -1,7 +1,7 @@
 function test135
 %TEST135 reduce-to-scalar, built-in monoids with terminal values
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test135: reduce to scalar\n') ;
diff --git a/Test/test136.m b/Test/test136.m
index d9af45e706..32d1ef5cfe 100644
--- a/Test/test136.m
+++ b/Test/test136.m
@@ -1,7 +1,7 @@
 function test136
 %TEST136 GxB_subassign, method 08, 09, 11
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test136: GxB_subassign, special cases\n') ;
diff --git a/Test/test137.m b/Test/test137.m
index bf3100f2f1..862bf52f48 100644
--- a/Test/test137.m
+++ b/Test/test137.m
@@ -1,7 +1,7 @@
 function test137
 %TEST137 GrB_eWiseMult with FIRST and SECOND operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test137: GrB_eWiseMult with FIRST and SECOND operators\n') ;
diff --git a/Test/test138.m b/Test/test138.m
index 76aa8cae30..0cd9a8b716 100644
--- a/Test/test138.m
+++ b/Test/test138.m
@@ -1,6 +1,9 @@
 function test138
 %TEST138 test assign, with coarse-only tasks in IxJ slice
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 
 [save_nthreads save_chunk] = nthreads_get ;
diff --git a/Test/test139.m b/Test/test139.m
index 2b9c29d1de..a73a07e4d9 100644
--- a/Test/test139.m
+++ b/Test/test139.m
@@ -1,24 +1,58 @@
 function test139
 %TEST139 merge sort, special cases
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test139 --------------- merge sort, special cases\n') ;
 rng ('default') ;
 
 n = 1e6 ;
 I = 42 * ones (n,1) ;
 J = (1:n)' ;
+K = 100 * ones (n,1) ;
 
 I0 = int64 (I) ;
-J0 = int64 (I) ;
+J0 = int64 (J) ;
+K0 = int64 (K) ;
 
 IJ1 = sortrows ([I0 J0]) ;
-
 [a b] = GB_mex_msort_2 (I0, J0, 2) ;
 assert (isequal (IJ1, [a b])) ;
 
-IJ1 = sortrows ([J0 I0]) ;
+IJ3 = sortrows ([I0 J0 K0]) ;
+[a b c] = GB_mex_msort_3 (I0, J0, K0, 2) ;
+assert (isequal (IJ3, [a b c])) ;
 
+IJ1 = sortrows ([J0 I0]) ;
 [a b] = GB_mex_msort_2 (J0, I0, 2) ;
 assert (isequal (IJ1, [a b])) ;
 
+a = GB_mex_msort_1 (I0, 1) ;
+c = sort (I0) ;
+assert (isequal (c, a)) ;
+
+a = GB_mex_msort_1 (J0, 1) ;
+c = sort (J0) ;
+assert (isequal (c, a)) ;
+
+I0 = int64 (randperm (10000, 5000)) ;
+a = GB_mex_msort_1 (I0, 8) ;
+c = sort (I0) ;
+assert (isequal (c, a')) ;
+
+for n = [10 100 1000 1e5 1e6]
+    I0 = int64 (1000 * rand (n,1)) ;
+    a = GB_mex_msort_1 (I0, 8) ;
+    c = sort (I0) ;
+    assert (isequal (c, a)) ;
+end
+
+for n = [10 100 1000 1e5 1e6]
+    I0 = int64 (4 * ones (n,1)) ;
+    a = GB_mex_msort_1 (I0, 8) ;
+    c = sort (I0) ;
+    assert (isequal (c, a)) ;
+end
+
 fprintf ('test139 --------------- all tests passed\n') ;
diff --git a/Test/test14.m b/Test/test14.m
index 321518bb08..1268b065f8 100644
--- a/Test/test14.m
+++ b/Test/test14.m
@@ -1,7 +1,7 @@
 function test14
 %TEST14 test GrB_reduce
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest14: reduce to column and scalar\n') ;
@@ -24,9 +24,9 @@
     mask = GB_random_mask (m, 1, 0.5, true, false) ;
 
     if (isequal (aclass, 'logical'))
-        ops = {'or', 'and', 'xor', 'eq'} ;
+        ops = {'or', 'and', 'xor', 'eq', 'any'} ;
     else
-        ops = {'min', 'max', 'plus', 'times'} ;
+        ops = {'min', 'max', 'plus', 'times', 'any'} ;
     end
 
     if (isequal (aclass, 'double'))
@@ -38,11 +38,6 @@
     end
 
     is_float = isequal (aclass, 'single') || isequal (aclass, 'double') ;
-    if (is_float)
-        tol = 64 * eps (aclass) ;
-    else
-        tol = 0 ;
-    end
 
     for A_is_hyper = 0:1
     for A_is_csc   = 0:1
@@ -53,45 +48,54 @@
     for k2 = 1:length(ops)
         op = ops {k2} ;
 
+        if (isequal (op, 'any'))
+            tol = [ ] ;
+        elseif (is_float)
+            tol = 64 * eps (aclass) ;
+        else
+            tol = 0 ;
+        end
+        identity = GB_spec_identity (op, aclass) ;
+
         % no mask
         w1 = GB_spec_reduce_to_vector (w, [], [], op, A, []) ;
         w2 = GB_mex_reduce_to_vector  (w, [], [], op, A, []) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % no mask, with accum
         w1 = GB_spec_reduce_to_vector (w, [], 'plus', op, A, []) ;
         w2 = GB_mex_reduce_to_vector  (w, [], 'plus', op, A, []) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % with mask
         w1 = GB_spec_reduce_to_vector (w, mask, [], op, A, []) ;
         w2 = GB_mex_reduce_to_vector  (w, mask, [], op, A, []) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % with mask and accum
         w1 = GB_spec_reduce_to_vector (w, mask, 'plus', op, A, []) ;
         w2 = GB_mex_reduce_to_vector  (w, mask, 'plus', op, A, []) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % no mask, transpose
         w1 = GB_spec_reduce_to_vector (w, [], [], op, B, dt) ;
         w2 = GB_mex_reduce_to_vector  (w, [], [], op, B, dt) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % no mask, with accum, transpose
         w1 = GB_spec_reduce_to_vector (w, [], 'plus', op, B, dt) ;
         w2 = GB_mex_reduce_to_vector  (w, [], 'plus', op, B, dt) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % with mask, transpose
         w1 = GB_spec_reduce_to_vector (w, mask, [], op, B, dt) ;
         w2 = GB_mex_reduce_to_vector  (w, mask, [], op, B, dt) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % with mask and accum, transpose
         w1 = GB_spec_reduce_to_vector (w, mask, 'plus', op, B, dt) ;
         w2 = GB_mex_reduce_to_vector  (w, mask, 'plus', op, B, dt) ;
-        GB_spec_compare (w1, w2, tol) ;
+        GB_spec_compare (w1, w2, identity, tol) ;
 
         % GB_spec_reduce_to_scalar always operates column-wise, but GrB_reduce
         % operates in whatever order it is given: by column if CSC or by row if
@@ -110,23 +114,29 @@
         % c1 and c2 can only be compared to within round-off error.
 
         % to scalar
-        c1 = GB_spec_reduce_to_scalar (cin, [ ], op, A_flip) ;
         c2 = GB_mex_reduce_to_scalar  (cin, [ ], op, A) ;
-        if (is_float)
-            assert (abs (c1-c2) < 4 * eps (A.class) *  (abs(c1) + 1))
+        if (isequal (op, 'any'))
+            X = GB_mex_cast (full (A.matrix (A.pattern)), A.class) ;
+            assert (any (X == c2)) ;
         else
-            assert (isequal (c1, c2)) ;
+            c1 = GB_spec_reduce_to_scalar (cin, [ ], op, A_flip) ;
+            if (is_float)
+                assert (abs (c1-c2) < 4 * eps (A.class) *  (abs(c1) + 1))
+            else
+                assert (isequal (c1, c2)) ;
+            end
         end
 
         % to scalar, with accum
-        c1 = GB_spec_reduce_to_scalar (cin, 'plus', op, A_flip) ;
-        c2 = GB_mex_reduce_to_scalar  (cin, 'plus', op, A) ;
-        if (is_float)
-            assert (abs (c1-c2) < 4 * eps (A.class) *  (abs(c1) + 1))
-        else
-            assert (isequal (c1, c2)) ;
+        c2 = GB_mex_reduce_to_scalar (cin, 'plus', op, A) ;
+        if (~isequal (op, 'any'))
+            c1 = GB_spec_reduce_to_scalar (cin, 'plus', op, A_flip) ;
+            if (is_float)
+                assert (abs (c1-c2) < 4 * eps (A.class) *  (abs(c1) + 1))
+            else
+                assert (isequal (c1, c2)) ;
+            end
         end
-
     end
     end
     end
diff --git a/Test/test140.m b/Test/test140.m
index d44fad980f..b3c915527e 100644
--- a/Test/test140.m
+++ b/Test/test140.m
@@ -1,6 +1,9 @@
-% function test140
+function test140
 %TEST140 test assign with duplicates
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 % addpath ../GraphBLAS
 
@@ -15,7 +18,6 @@
 Cout = gb.assign (Cin, A, { }, { }) ;
 assert (isequal (A, sparse (Cout))) ;
 
-
 I = [2 1 5] ;
 J = [3 3 1 2] ;
 % J = [2 2 1 3] ;
@@ -36,4 +38,3 @@
 
 % assert (isequal (C7, sparse (Cout))) ;
 
-
diff --git a/Test/test141.m b/Test/test141.m
new file mode 100644
index 0000000000..1c517c8da7
--- /dev/null
+++ b/Test/test141.m
@@ -0,0 +1,98 @@
+function test141
+%TEST141 test GrB_eWiseAdd (all types and operators) for dense matrices
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+[bin_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
+
+fprintf ('test141 ------------ GrB_eWiseAdd with dense matrices\n') ;
+
+m = 5 ;
+n = 5 ;
+
+rng ('default') ;
+
+M = sprand (m, n, 0.5) ;
+Amat = sparse (100 * rand (m,n)) ;
+Bmat = sparse (100 * rand (m,n)) ;
+Cmat = sparse (100 * rand (m,n)) ;
+Emat = sprand (m, n, 0.5) ;
+Smat = sparse (m,n) ;
+desc.mask = 'structural' ;
+
+A.matrix = Amat ; A.class = 'see below' ;
+B.matrix = Bmat ; B.class = 'see below' ;
+C.matrix = Cmat ; C.class = 'see below' ;
+S.matrix = Smat ; S.class = 'see below' ;
+E.matrix = Emat ; E.class = 'see below' ;
+
+for k2 = 1:length(bin_ops)
+    binop = bin_ops {k2}  ;
+    fprintf ('%s', binop) ;
+
+    for k1 = 1:length (classes)
+        clas = classes {k1}  ;
+
+        op.opname = binop ;
+        op.opclass = clas ;
+        fprintf ('.') ;
+
+        A.class = clas ;
+        B.class = clas ;
+        E.class = clas ;
+
+        if (k2 > 20)
+            % eq, ne, gt, lt, ge, le
+            S.class = 'logical' ;
+            C.class = 'logical' ;
+        else
+            S.class = clas ;
+            C.class = clas ;
+        end
+
+        %---------------------------------------
+        % C = A+B
+        %---------------------------------------
+
+        C0 = GB_spec_eWiseAdd_Matrix (S, [ ], [ ], op, A, B, [ ]) ;
+        C1 = GB_mex_eWiseAdd_Matrix  (S, [ ], [ ], op, A, B, [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = A+B, both A and B dense
+        %---------------------------------------
+
+        C0 = GB_spec_eWiseAdd_Matrix (S, M, [ ], op, A, B, desc) ;
+        C1 = GB_mex_eWiseAdd_Matrix  (S, M, [ ], op, A, B, desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = A+E, A dense, E sparse
+        %---------------------------------------
+
+        C0 = GB_spec_eWiseAdd_Matrix (S, M, [ ], op, A, E, desc) ;
+        C1 = GB_mex_eWiseAdd_Matrix  (S, M, [ ], op, A, E, desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = E+A, A dense, E sparse
+        %---------------------------------------
+
+        C0 = GB_spec_eWiseAdd_Matrix (S, M, [ ], op, E, A, desc) ;
+        C1 = GB_mex_eWiseAdd_Matrix  (S, M, [ ], op, E, A, desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C += A+B
+        %---------------------------------------
+
+        C0 = GB_spec_eWiseAdd_Matrix (C, [ ], op, op, A, B, [ ]) ;
+        C1 = GB_mex_eWiseAdd_Matrix  (C, [ ], op, op, A, B, [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+    end
+end
+
+fprintf ('\ntest141: all tests passed\n') ;
+
diff --git a/Test/test142.m b/Test/test142.m
new file mode 100644
index 0000000000..faa52a3dae
--- /dev/null
+++ b/Test/test142.m
@@ -0,0 +1,162 @@
+function test142
+%TEST142 test GrB_assign for dense matrices
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+[bin_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
+
+fprintf ('test142 ------------ GrB_assign with dense matrices\n') ;
+
+m = 10 ;
+n = 12 ;
+
+rng ('default') ;
+
+M = sprand (m, n, 0.5) ;
+Amat = sparse (100 * rand (m,n)) ;
+Bmat = sparse (100 * sprand (m,n, 0.5)) ;
+Cmat = sparse (100 * rand (m,n)) ;
+Smat = sparse (m,n) ;
+Xmat = sparse (pi) ;
+desc.mask = 'structural' ;
+drep.outp = 'replace' ;
+
+A.matrix = Amat ; A.class = 'see below' ;
+B.matrix = Bmat ; B.class = 'see below' ;
+C.matrix = Cmat ; C.class = 'see below' ;
+S.matrix = Smat ; S.class = 'see below' ;
+X.matrix = Xmat ; X.class = 'see below' ;
+Bmask = logical (Bmat) ;
+
+for k1 = 1:length (classes)
+    clas = classes {k1}  ;
+    fprintf ('%s', clas) ;
+
+    A.class = clas ;
+    B.class = clas ;
+    X.class = clas ;
+
+    for k3 = 1:2
+
+        if (k3 == 1)
+            C.class = 'logical' ;
+            S.class = 'logical' ;
+        else
+            C.class = clas ;
+            S.class = clas ;
+        end
+
+        %---------------------------------------
+        % C<M> = A where A is dense
+        %---------------------------------------
+
+        C0 = GB_spec_assign (C, M, [ ], A, [ ], [ ], [ ], false) ;
+        C1 = GB_mex_assign  (C, M, [ ], A, [ ], [ ], [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = B where B is sparse
+        %---------------------------------------
+
+        C0 = GB_spec_assign (C, M, [ ], B, [ ], [ ], [ ], false) ;
+        C1 = GB_mex_assign  (C, M, [ ], B, [ ], [ ], [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = A where A is dense and C starts empty
+        %---------------------------------------
+
+        C0 = GB_spec_assign (S, M, [ ], A, [ ], [ ], desc, false) ;
+        C1 = GB_mex_assign  (S, M, [ ], A, [ ], [ ], desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<B> = B where B is sparse
+        %---------------------------------------
+
+        C0 = GB_spec_assign (C, Bmask, [ ], B, [ ], [ ], desc, false) ;
+        C1 = GB_mex_assign_alias_mask (C, B, desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = x where C is dense
+        %---------------------------------------
+
+        C0 = GB_spec_assign (C, M, [ ], X, [ ], [ ], [ ], true) ;
+        C1 = GB_mex_assign  (C, M, [ ], X, [ ], [ ], [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M> = x where C is dense
+        %---------------------------------------
+
+        C0 = GB_spec_assign (C, M, [ ], X, [ ], [ ], desc, true) ;
+        C1 = GB_mex_assign  (C, M, [ ], X, [ ], [ ], desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C<M,struct> = x
+        %---------------------------------------
+
+        C0 = GB_spec_assign (S, M, [ ], X, [ ], [ ], desc, true) ;
+        C1 = GB_mex_assign  (S, M, [ ], X, [ ], [ ], desc) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % C = x
+        %---------------------------------------
+
+        C0 = GB_spec_assign (S, [ ], [ ], X, [ ], [ ], [ ], true) ;
+        C1 = GB_mex_assign  (S, [ ], [ ], X, [ ], [ ], [ ]) ;
+        GB_spec_compare (C0, C1) ;
+
+        %---------------------------------------
+        % with accum operators
+        %---------------------------------------
+
+        for k2 = 1:length(bin_ops)
+            binop = bin_ops {k2}  ;
+
+            op.opname = binop ;
+            op.opclass = clas ;
+            fprintf ('.') ;
+
+            %---------------------------------------
+            % C += A where A is dense
+            %---------------------------------------
+
+            C0 = GB_spec_assign (C, [ ], op, A, [ ], [ ], [ ], false) ;
+            C1 = GB_mex_assign  (C, [ ], op, A, [ ], [ ], [ ]) ;
+            GB_spec_compare (C0, C1) ;
+
+            %---------------------------------------
+            % C += B where B is sparse
+            %---------------------------------------
+
+            C0 = GB_spec_assign (C, [ ], op, B, [ ], [ ], [ ], false) ;
+            C1 = GB_mex_assign  (C, [ ], op, B, [ ], [ ], [ ]) ;
+            GB_spec_compare (C0, C1) ;
+
+            %---------------------------------------
+            % C += x
+            %---------------------------------------
+
+            C0 = GB_spec_assign (C, [ ], op, X, [ ], [ ], [ ], true) ;
+            C1 = GB_mex_assign  (C, [ ], op, X, [ ], [ ], [ ]) ;
+            GB_spec_compare (C0, C1) ;
+
+            %---------------------------------------
+            % C<replace> += x
+            %---------------------------------------
+
+            C0 = GB_spec_assign (C, [ ], op, X, [ ], [ ], drep, true) ;
+            C1 = GB_mex_subassign  (C, [ ], op, X, [ ], [ ], drep) ;
+            GB_spec_compare (C0, C1) ;
+
+        end
+    end
+end
+
+fprintf ('\ntest142: all tests passed\n') ;
+
diff --git a/Test/test143.m b/Test/test143.m
new file mode 100644
index 0000000000..a58211d795
--- /dev/null
+++ b/Test/test143.m
@@ -0,0 +1,100 @@
+function test143
+%TEST143 test special cases for C<!M>=A*B and C<M>=A*B
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test143 ----------------------------- A*B special cases\n') ;
+
+rng ('default') ;
+
+n = 3000 ;
+d = 0.001 ;
+A = sprand (n, n, d) ;
+
+semiring.add = 'plus' ;
+semiring.multiply = 'times' ;
+semiring.class = 'double' ;
+
+% coarse Gustavson tasks, C<!M>=A*B, C(:,j) very sparse compared to M(:,j)
+S = sparse (n, n) ;
+M = logical (sprand (n, n, 0.01)) ;
+M (:,1) = 1 ;
+B = sprand (n, n, d) ;
+C2 = GB_mex_mxm (S, M, [ ], semiring, A, B, struct ('mask', 'comp')) ;
+C = (A*B) .* double (~M) ;
+assert (nnz (C) > 0) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+%----------------------------------------
+desc = struct ('axb', 'hash', 'mask', 'comp') ;
+%----------------------------------------
+
+% coarse hash tasks, C<!M>=A*B
+S = sparse (n, n) ;
+M = logical (sprand (n, n, 0.01)) ;
+B = sprand (n, n, d) ;
+C2 = GB_mex_mxm (S, M, [ ], semiring, A, B, desc) ;
+C = (A*B) .* double (~M) ;
+assert (nnz (C) > 0) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+% fine hash tasks, C<!M>=A*B
+S = sparse (n, 1) ;
+M = logical (sprand (n, 1, 0.01)) ;
+B = sprand (n, 1, d) ;
+C2 = GB_mex_mxm (S, M, [ ], semiring, A, B, desc) ;
+C = (A*B) .* double (~M) ;
+assert (nnz (C) > 0) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+%----------------------------------------
+desc = struct ('axb', 'hash') ;
+%----------------------------------------
+
+% coarse hash tasks, C<M>=A*B
+S = sparse (n, n) ;
+M = logical (sprand (n, n, 0.01)) ;
+B = sprand (n, n, d) ;
+C2 = GB_mex_mxm (S, M, [ ], semiring, A, B, desc) ;
+C = (A*B) .* double (M) ;
+assert (nnz (C) > 0) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+% fine hash tasks, C<M>=A*B
+S = sparse (n, 1) ;
+M = logical (sprand (n, 1, 0.01)) ;
+B = sprand (n, 1, d) ;
+M (1:3) = 1 ;
+A (1:3,1:3) = rand (3) ;
+B (1:3) = rand (3,1) ;
+C = (A*B) .* double (M) ;
+assert (nnz (C) > 0) ;
+C2 = GB_mex_mxm (S, M, [ ], semiring, A, B, desc) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+%----------------------------------------
+m = 10e6 ;
+A = sprand (m, n, d) ;
+[save save_chunk] = nthreads_get ;
+nthreads_set (4, 1) ;
+%----------------------------------------
+
+% fine hash tasks, C=A*B
+S = sparse (m, 1) ;
+B = sprand (n, 1, d) ;
+B (1:100, 1) = rand (100, 1) ;
+C = (A*B) ;
+assert (nnz (C) > 0) ;
+C2 = GB_mex_mxm (S, [ ], [ ], semiring, A, B, desc) ;
+err = norm (C - C2.matrix, 1) ;
+assert (err < 1e-12) ;
+
+nthreads_set (save, save_chunk) ;
+fprintf ('\ntest143: all tests passed\n') ;
+
diff --git a/Test/test144.m b/Test/test144.m
new file mode 100644
index 0000000000..651e1d36f4
--- /dev/null
+++ b/Test/test144.m
@@ -0,0 +1,35 @@
+function test144
+%TEST144 test GB_cumsum
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test144 ---------------------- test GB_cumsum\n') ;
+
+rng ('default') ;
+
+n = 1e5 ;
+
+c = int64 (50 * rand (1,n)) ;
+
+for nthreads = 1:8
+
+    % fprintf ('nthreads: %d\n', nthreads) ;
+
+    for nmalloc = 0:2
+
+        [p1, k1] = GB_mex_cumsum (c, nthreads, nmalloc) ;
+        p = cumsum ([0 c]) ;
+        k = sum (c ~= 0) ;
+        assert (isequal (p, p1)) ;
+        assert (k == k1) ;
+
+        p1 = GB_mex_cumsum (c, nthreads, nmalloc) ;
+        p = cumsum ([0 c]) ;
+        assert (isequal (p, p1)) ;
+
+    end
+end
+
+fprintf ('test144: all tests passed\n') ;
+
diff --git a/Test/test145.m b/Test/test145.m
new file mode 100644
index 0000000000..553413c535
--- /dev/null
+++ b/Test/test145.m
@@ -0,0 +1,77 @@
+function test145
+%TEST145 test dot4
+% GB_AxB_dot4 computes C+=A'*B when C is dense.
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test145 -------------------- C+=A''*B when C is dense, with dot4\n') ;
+
+rng ('default') ;
+
+A = sparse (rand (4)) ;
+B = sparse (rand (4)) ;
+C = sparse (rand (4)) ;
+AT = A' ;
+BT = B' ;
+
+semiring.add = 'plus' ;
+semiring.multiply = 'div' ;
+semiring.class = 'double' ;
+[mult_op add_op id] = GB_spec_semiring (semiring) ;
+
+dnn = struct ('axb', 'dot') ;
+dtn = struct ('axb', 'dot', 'in0', 'tran') ;
+dnt = struct ('axb', 'dot', 'in1', 'tran') ;
+dtt = struct ('axb', 'dot', 'in0', 'tran', 'in1', 'tran') ;
+
+% C = GB_mex_mxm (C, Mask, accum, semiring, A, B, desc)
+
+C2 = GB_mex_mxm  (C, [ ], add_op, semiring, A, B, dnn) ;
+C1 = GB_spec_mxm (C, [ ], add_op, semiring, A, B, dnn) ;
+GB_spec_compare (C1, C2) ;
+
+C2 = GB_mex_mxm  (C, [ ], add_op, semiring, AT, B, dtn) ;
+C1 = GB_spec_mxm (C, [ ], add_op, semiring, AT, B, dtn) ;
+GB_spec_compare (C1, C2) ;
+
+C2 = GB_mex_mxm  (C, [ ], add_op, semiring, A, BT, dnt) ;
+C1 = GB_spec_mxm (C, [ ], add_op, semiring, A, BT, dnt) ;
+GB_spec_compare (C1, C2) ;
+
+C2 = GB_mex_mxm  (C, [ ], add_op, semiring, AT, BT, dtt) ;
+C1 = GB_spec_mxm (C, [ ], add_op, semiring, AT, BT, dtt) ;
+GB_spec_compare (C1, C2) ;
+
+X = 1./A ;
+C1 = X*B ;
+
+C2 = GB_mex_rdiv  (A, B,   1003) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (A, B,   false, false, 1003, 0) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (AT, B,  true,  false, 1003, 0) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (A, BT,  false, true,  1003, 0) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (AT, BT, true,  true,  1003, 0) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+% update C in place with dot4:
+X = 1./B ;
+C1 = A*X + pi ;
+
+C2 = GB_mex_rdiv2 (A, B,   false, false, 1003, 1, pi) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (AT, B,  true,  false, 1003, 1, pi) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+C2 = GB_mex_rdiv2 (A, BT,  false, true,  1003, 1, pi) ;
+assert (norm (C1-C2,1) < 1e-5)
+
+fprintf ('test145: all tests passed\n') ;
diff --git a/Test/test146.m b/Test/test146.m
new file mode 100644
index 0000000000..e4391e5453
--- /dev/null
+++ b/Test/test146.m
@@ -0,0 +1,31 @@
+function test146
+%TEST146 test C<M,struct> = scalar
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test146 --------------------- C<M,struct> = scalar\n') ;
+
+m = 5 ;
+n = 4 ;
+
+M = logical (sprand (m, n, 0.5)) ;
+
+C1 = GB_mex_expand (M, pi) 
+C2 = sparse (m, n) ;
+C2 (M) = pi 
+assert (isequal (C2, C1.matrix))
+
+z = 1 + 1i ;
+C1 = GB_mex_expand (M, z) 
+C2 = sparse (m, n) ;
+C2 (M) = z 
+assert (isequal (C2, C1.matrix))
+
+C1 = GB_mex_expand (M, true) 
+C2 = logical (sparse (m, n)) ;
+C2 (M) = true
+assert (isequal (C2, logical (C1.matrix)))
+
+fprintf ('test146: all tests passed\n') ;
+
diff --git a/Test/test147.m b/Test/test147.m
new file mode 100644
index 0000000000..bf0aa27976
--- /dev/null
+++ b/Test/test147.m
@@ -0,0 +1,26 @@
+function test147
+%TEST147 test C<M>A*B with very sparse M
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test147 ----------------------------- C<M>A*B with very sparse M\n') ;
+rng ('default') ;
+
+n = 1000 ;
+A = sprand (n, n, 0.01) ;
+A (:,1:300) = 1 ;
+S = sparse (n, n) ;
+M = spones (speye (n) + sparse (n, n, 1e-5)) ;
+
+semiring.add = 'plus' ;
+semiring.multiply = 'times' ;
+semiring.class = 'double' ;
+
+C1 = GB_mex_mxm (S, M, [ ], semiring, A, A, [ ]) ;
+C2 = (A*A) .* M ;
+
+assert (norm (C1.matrix - C2, 1) < 1e-12)
+
+fprintf ('test147: all tests passed\n') ;
+
diff --git a/Test/test148.m b/Test/test148.m
new file mode 100644
index 0000000000..4c9fefa9fe
--- /dev/null
+++ b/Test/test148.m
@@ -0,0 +1,76 @@
+function test148
+%TEST148 eWiseAdd with aliases
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test148 ---------------eWiseAdd with alias\n') ;
+
+rng ('default') ;
+
+n = 5 ;
+C = sprand (n, n, 0.5) ;
+A = sprand (n, n, 0.5) ;
+M = sprand (n, n, 0.5) ;
+
+C0 = C + A + A ;
+C1 = GB_mex_eWiseAdd_Matrix (C, [ ], 'plus', 'plus', A, A, [ ]) ;
+assert (norm (C0 - C1.matrix, 1) < 1e-12) ;
+
+C2 = GB_mex_ewise_alias2 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+C0 = C + A ;
+C2 = GB_mex_ewise_alias1 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+C2 = GB_mex_ewise_alias3 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+C = sparse (rand (n)) ;
+A = sparse (rand (n)) ;
+
+C0 = C + A + A ;
+C1 = GB_mex_eWiseAdd_Matrix (C, [ ], 'plus', 'plus', A, A, [ ]) ;
+assert (norm (C0 - C1.matrix, 1) < 1e-12) ;
+
+C2 = GB_mex_ewise_alias2 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+C0 = C + A ;
+C2 = GB_mex_ewise_alias1 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+C2 = GB_mex_ewise_alias3 (C, 'plus', A, [ ]) ;
+assert (norm (C0 - C2.matrix, 1) < 1e-12) ;
+
+desc = struct ('mask', 'structural') ;
+
+C1 = GB_mex_eWiseAdd_Matrix (C, M, [ ], 'plus', M, M, desc) ;
+C2 = GB_mex_ewise_alias4 (C, M, 'plus', desc) ;
+assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
+
+M = sparse (true (n)) ;
+C1 = GB_mex_eWiseAdd_Matrix (C, M, [ ], 'plus', M, M, desc) ;
+C2 = GB_mex_ewise_alias4 (C, M, 'plus', desc) ;
+assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
+
+% #define USAGE "C = GB_mex_ewise_alias5 (C, M, op, A, desc)"
+
+C1 = GB_mex_eWiseAdd_Matrix (C, M, [ ], 'plus', A, M, desc) ;
+C2 = GB_mex_ewise_alias5 (C, M, 'plus', A, desc) ;
+assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
+
+% C<M> = A+M
+A = sparse (rand (n)) ;
+M = sprand (n, n, 0.05) ;
+C1 = GB_mex_eWiseAdd_Matrix (C, M, [ ], 'plus', A, M, desc) ;
+C2 = GB_mex_ewise_alias5 (C, M, 'plus', A, desc) ;
+assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
+
+% C<M> = M+A
+C1 = GB_mex_eWiseAdd_Matrix (C, M, [ ], 'plus', M, A, desc) ;
+C2 = GB_mex_ewise_alias6 (C, M, 'plus', A, desc) ;
+assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
+
+fprintf ('test148: all tests passed\n') ;
diff --git a/Test/test149.m b/Test/test149.m
new file mode 100644
index 0000000000..4b00d55fb7
--- /dev/null
+++ b/Test/test149.m
@@ -0,0 +1,40 @@
+function test149
+%TEST149 test fine hash method for C<!M>=A*B
+
+fprintf ('test149: --------- fine hash method for C<!M>=A*B\n') ;
+
+rng ('default') ;
+
+nthreads_set (4, 1) ;
+
+desc.axb = 'hash' ;
+desc.mask = 'complement' ;
+
+n = 1000 ;
+m = 1e8 ;
+A = sparse (m, n) ;
+A (1:n, 1:n) = rand (n) ;
+B = sparse (rand (n,1)) ;
+C = sparse (m, 1) ;
+
+M = logical (sparse (m, 1)) ;
+M (1:n, 1) = sparse (rand (n,1) > 0.5) ;
+
+semiring.add = 'plus' ;
+semiring.multiply = 'times' ;
+semiring.class = 'double' ;
+
+GrB.burble (1) ;
+tic
+C1 = GB_mex_mxm (C, M, [ ], semiring, A, B, desc) ;
+toc
+GrB.burble (0) ;
+tic
+C2 = (A*B) .* double (~M) ;
+toc
+
+cnorm = norm (C2,1) ;
+assert (norm (C1.matrix - C2, 1) / cnorm < 1e-12)
+
+fprintf ('test149: all tests passed\n') ;
+
diff --git a/Test/test15.m b/Test/test15.m
index 77f0331d00..5f5317fea7 100644
--- a/Test/test15.m
+++ b/Test/test15.m
@@ -1,7 +1,7 @@
 function test15
 %TEST15 test AxB and AdotB internal functions
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n --------------------- GB_mex_AxB, GB_mex_AdotB tests\n') ;
diff --git a/Test/test150.m b/Test/test150.m
new file mode 100644
index 0000000000..5c7126a1c5
--- /dev/null
+++ b/Test/test150.m
@@ -0,0 +1,43 @@
+function test150
+%TEST150 test GrB_mxm with typecasting and zombies (dot3)
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+fprintf ('test150: ------- GrB_mxm with typecasting and zombies (dot3)\n') ;
+
+[~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
+
+semiring.add = 'plus' ;
+semiring.multiply = 'times' ;
+semiring.class = 'double' ;
+identity = 0 ;
+
+dnn = struct ( 'axb', 'dot') ;
+
+m = 8 ;
+n = 5 ;
+s = 4 ;
+density = 0.1 ;
+
+for k6 = 1:length (classes)
+    aclas = classes {k6} ;
+    fprintf ('%s ', aclas) ;
+
+    A = GB_spec_random (m, s, density, 100, aclas) ;
+    B = GB_spec_random (s, n, density, 100, aclas) ;
+    M = GB_random_mask(m,n,0.2) ;
+
+    clear C
+    C.matrix = sparse (m,n) ;
+    C.class = 'int32' ;
+    C.pattern = false (m,n) ;
+
+    C0 = GB_spec_mxm (C, M, [ ], semiring, A, B, dnn);
+    C1 = GB_mex_mxm  (C, M, [ ], semiring, A, B, dnn);
+    GB_spec_compare (C0, C1, identity) ;
+
+end
+
+fprintf ('\ntest150: all tests passed\n') ;
+
diff --git a/Test/test16.m b/Test/test16.m
index f4cd59e8d8..d82f5c2c1b 100644
--- a/Test/test16.m
+++ b/Test/test16.m
@@ -1,7 +1,7 @@
 function test16
 %TEST16 test user-defined complex type (runs all testc*.m)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % all complex matrix tests
diff --git a/Test/test17.m b/Test/test17.m
index 61ffcb3e46..88ccfea60f 100644
--- a/Test/test17.m
+++ b/Test/test17.m
@@ -1,7 +1,7 @@
 function test17
 %TEST17 test GrB_*_extractElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ------------ testing GrB_extractElement\n') ;
diff --git a/Test/test18.m b/Test/test18.m
index 9ea23a279e..4da9563051 100644
--- a/Test/test18.m
+++ b/Test/test18.m
@@ -1,7 +1,7 @@
 function test18(fulltest)
 %TEST18 test GrB_eWiseAdd and GrB_eWiseMult
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [bin_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test19.m b/Test/test19.m
index 05c21a364a..3690cd6293 100644
--- a/Test/test19.m
+++ b/Test/test19.m
@@ -1,7 +1,7 @@
 function test19(fulltest)
 %TEST19 test GxB_subassign and GrB_*_setElement with many pending operations
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test19b.m b/Test/test19b.m
index 708d1979aa..383c100429 100644
--- a/Test/test19b.m
+++ b/Test/test19b.m
@@ -1,7 +1,7 @@
 function test19b(fulltest)
 %TEST19B test GrB_assign and GrB_*_setElement with many pending operations
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest19b: GrB_assign and setElement, many pending computations\n') ;
diff --git a/Test/test20.m b/Test/test20.m
index ec9103abd4..e007d072ea 100644
--- a/Test/test20.m
+++ b/Test/test20.m
@@ -1,7 +1,7 @@
 function test20(fulltest)
 %TEST20 test GrB_mxm, mxv, and vxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
@@ -33,7 +33,7 @@ function test20(fulltest)
     k2_list = 1:length(add_ops) ;
     k3_list = 1:length(classes) ;
 else
-    k1_list = [ 8 ] ;   % times
+    k1_list = [ 9 ] ;   % times
     k2_list = [ 3 ] ;   % plus
     k3_list = [ 11 ] ;  % double
 end
diff --git a/Test/test21.m b/Test/test21.m
index e3bf00c2af..390f6e8dab 100644
--- a/Test/test21.m
+++ b/Test/test21.m
@@ -1,7 +1,7 @@
 function test21(fulltest)
 %TEST21 test GxB_subassign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test21b.m b/Test/test21b.m
index a5ebacb53c..dd8a3f0b7a 100644
--- a/Test/test21b.m
+++ b/Test/test21b.m
@@ -1,7 +1,7 @@
 function test21b (fulltest)
 %TEST21B test GrB_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test22.m b/Test/test22.m
index 8194c15186..f1e780a7e1 100644
--- a/Test/test22.m
+++ b/Test/test22.m
@@ -1,7 +1,7 @@
 function test22(fulltest)
 %TEST22 test GrB_transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test23.m b/Test/test23.m
index 1c87fdae51..d39513a056 100644
--- a/Test/test23.m
+++ b/Test/test23.m
@@ -1,7 +1,7 @@
 function test23(fulltest)
 %TEST23 test GrB_*_build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
@@ -14,6 +14,8 @@ function test23(fulltest)
 ops = {
 'first',  0, % z = x
 'second', 0, % z = y
+'pair',   1, % z = 1
+'any',    1, % z = pick x or y
 'min',    1, % z = min(x,y)
 'max',    1, % z = max(x,y)
 'plus',   1, % z = x + y
@@ -61,6 +63,7 @@ function test23(fulltest)
     for k1 = 1:size (ops,1)
         op.opname = ops {k1,1} ;
         is_associative = ops {k1,2} ;
+        op_is_any = isequal (op.opname, 'any') ;
 
         fprintf ('%s', op.opname) ;
 
@@ -110,45 +113,48 @@ function test23(fulltest)
 
                     for A_is_csc   = 0:1
 
-                    A = GB_mex_Matrix_build (I, J, X, nrows, ncols, op, ...
-                        cclass, A_is_csc) ;
-                    % A is sparse but may have explicit zeros
-                    if (~spok (A.matrix*1))
-                        fprintf ('test failure: invalid sparse matrix\n') ;
-                        assert (false) ;
-                    end
-                    A.matrix = full (double (A.matrix)) ;
-                    S = GB_spec_build (I, J, X, nrows, ncols, op, 'natural', cclass) ;
-                    if (~isequalwithequalnans (A.matrix, double (S.matrix))) ;
-                        fprintf ('test failure: does not match spec\n') ;
-                        assert (false) ;
-                    end
-                    assert (isequal (S.class, A.class)) ;
-
-                    % build in random order, for associative operators.
-                    if (is_associative)
-                        [S2 p] = GB_spec_build (I, J, X, nrows, ncols, ...
-                            op, 'random', cclass) ;
-                        if (opint)
-                            % integers are perfectly associative
-                            if (~isequal (A.matrix, double (S2.matrix)))
-                                fprintf ('fail: int non-associative\n') ;
-                                assert (false) ;
-                            end
-                        else
-                            % floating point is approximately associative
-                            tol = norm (double (S2.matrix)) * eps (op.opclass) ;
-                            ok = isequal (isnan (A.matrix), isnan (S2.matrix)) ;
-                            A.matrix (isnan (A.matrix)) = 0 ;
-                            S2.matrix (isnan (S2.matrix)) = 0 ;
-                            ok = ok & (norm (double (A.matrix - double (S2.matrix))) < tol) ;
-                            if (~ok)
-                                fprintf ('fail: float non-associative\n') ;
+                        A = GB_mex_Matrix_build (I, J, X, nrows, ncols, op, ...
+                            cclass, A_is_csc) ;
+                        % A is sparse but may have explicit zeros
+                        if (~spok (A.matrix*1))
+                            fprintf ('test failure: invalid sparse matrix\n') ;
+                            assert (false) ;
+                        end
+                        A.matrix = full (double (A.matrix)) ;
+                        if (~op_is_any)
+                            S = GB_spec_build (I, J, X, nrows, ncols, op, 'natural', cclass) ;
+                            if (~isequalwithequalnans (A.matrix, double (S.matrix))) ;
+                                fprintf ('test failure: does not match spec\n') ;
                                 assert (false) ;
                             end
+                            assert (isequal (S.class, A.class)) ;
                         end
-                    end
 
+                        % build in random order, for associative operators.
+                        if (is_associative)
+                            [S2 p] = GB_spec_build (I, J, X, nrows, ncols, ...
+                                op, 'random', cclass) ;
+                            if (op_is_any)
+                                % 'any' reduction
+                            elseif (opint)
+                                % integers are perfectly associative
+                                if (~isequal (A.matrix, double (S2.matrix)))
+                                    fprintf ('fail: int non-associative\n') ;
+                                    assert (false) ;
+                                end
+                            else
+                                % floating point is approximately associative
+                                tol = norm (double (S2.matrix)) * eps (op.opclass) ;
+                                ok = isequal (isnan (A.matrix), isnan (S2.matrix)) ;
+                                A.matrix (isnan (A.matrix)) = 0 ;
+                                S2.matrix (isnan (S2.matrix)) = 0 ;
+                                ok = ok & (norm (double (A.matrix - double (S2.matrix))) < tol) ;
+                                if (~ok)
+                                    fprintf ('fail: float non-associative\n') ;
+                                    assert (false) ;
+                                end
+                            end
+                        end
                     end
 
                     % build a vector in the natural order (discard J)
@@ -156,19 +162,19 @@ function test23(fulltest)
                     % fprintf ('opclass: %s ', opclass) ;
                     % fprintf ('xclass: %s\n', xclass) ;
                     A = GB_mex_Vector_build (I, X, nrows, op, cclass) ;
-                    % pause
                     % A is sparse but may have explicit zeros
                     if (~spok (A.matrix*1))
                         fprintf ('test failure: invalid sparse matrix\n') ;
                         assert (false) ;
                     end
-                    A.matrix = full (double (A.matrix)) ;
-                    S = GB_spec_build (I, [ ], X, nrows, 1, op, 'natural', cclass) ;
-                    if (~isequalwithequalnans (A.matrix, double (S.matrix))) ;
-                        fprintf ('test failure: does not match spec\n') ;
-                        assert (false) ;
+                    if (~op_is_any)
+                        A.matrix = full (double (A.matrix)) ;
+                        S = GB_spec_build (I, [ ], X, nrows, 1, op, 'natural', cclass) ;
+                        if (~isequalwithequalnans (A.matrix, double (S.matrix))) ;
+                            fprintf ('test failure: does not match spec\n') ;
+                            assert (false) ;
+                        end
                     end
-
                 end
             end
         end
diff --git a/Test/test24.m b/Test/test24.m
index 0a76500fe6..b23c4cde54 100644
--- a/Test/test24.m
+++ b/Test/test24.m
@@ -2,7 +2,7 @@ function test24(fulltest)
 %TEST24 test GrB_reduce
 % test24(fulltest); fulltest=1 if longer test, 0 for quick test
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [accum_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
@@ -125,7 +125,7 @@ function test24(fulltest)
                                     (cin, accum, reduce, B) ;
                                 assert (isequal (c, c3))
 
-%                               % row-wise reduce matrix to vector
+                                % row-wise reduce matrix to vector
 
                                 % no mask
                                 x = GB_mex_reduce_to_vector ...
@@ -142,7 +142,7 @@ function test24(fulltest)
                                     (xin, mask, accum, reduce, A, [ ]) ;
                                 GB_spec_compare (x, x3, identity) ;
 
-%                               % col-wise reduce matrix to vector
+                                % col-wise reduce matrix to vector
 
                                 % no mask
                                 y = GB_mex_reduce_to_vector ...
diff --git a/Test/test25.m b/Test/test25.m
index a4d398d532..1d993769ed 100644
--- a/Test/test25.m
+++ b/Test/test25.m
@@ -1,7 +1,7 @@
 function test25
 %TEST25 test GxB_select
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest25: GxB_select tests\n') ;
diff --git a/Test/test26.m b/Test/test26.m
index aca4974662..c1b1391f19 100644
--- a/Test/test26.m
+++ b/Test/test26.m
@@ -1,7 +1,7 @@
 function test26(longtests)
 %TEST26 performance test for GxB_select
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest26 ------------------------------performance of GxB_select\n') ;
@@ -133,5 +133,14 @@ function test26(longtests)
     end
 end
 
+ok = true ;
+A = sparse (ones (4)) ;
+try
+    C = GB_mex_select (A, [ ], [ ], 'tril', A, A, [ ]) ;
+    ok = false ;
+catch me
+    fprintf ('\nexpected error: %s\n', me.message) ;
+end
+
 nthreads_set (save_nthreads, save_chunk) ;
 fprintf ('test26: all tests passed\n') ;
diff --git a/Test/test27.m b/Test/test27.m
index c73adceaa0..06c31d936c 100644
--- a/Test/test27.m
+++ b/Test/test27.m
@@ -1,8 +1,8 @@
 function test27
 %TEST27 test GxB_select with user-defined select op (band)
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('test 27: GxB_select with user-defined op (band)\n') ;
 
@@ -13,17 +13,14 @@
         for lo = -12:12
             for hi = -12:12
 
-                for pre = 0:1
+                C1 = GB_mex_band (A, lo, hi, 0) ;
+                C2 = triu (tril (A,hi), lo) ;
+                assert (isequal (C1, C2)) ;
 
-                    C1 = GB_mex_band (A, lo, hi, 0, pre) ;
-                    C2 = triu (tril (A,hi), lo) ;
-                    assert (isequal (C1, C2)) ;
+                C1 = GB_mex_band (A, lo, hi, 1) ;
+                C2 = triu (tril (A',hi), lo) ;
+                assert (isequal (C1, C2)) ;
 
-                    C1 = GB_mex_band (A, lo, hi, 1, pre) ;
-                    C2 = triu (tril (A',hi), lo) ;
-                    assert (isequal (C1, C2)) ;
-
-                end
             end
         end
     end
diff --git a/Test/test28.m b/Test/test28.m
index 2cb8c2907e..2b3d51c53b 100644
--- a/Test/test28.m
+++ b/Test/test28.m
@@ -1,7 +1,7 @@
 function test28
 %TEST28 test mxm with aliased inputs, C<C> = accum(C,C*C)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -19,7 +19,7 @@
 
         C1 = GB_mex_mxm_alias (C, 'plus', semiring, [ ]) ;
         C2 = GB_mex_mxm (C, C, 'plus', semiring, C, C, [ ]) ;
-        assert (isequal (C1, C2)) ;
+        assert (norm (C1.matrix - C2.matrix, 1) < 1e-12) ;
     end
 end
 
diff --git a/Test/test29.m b/Test/test29.m
index f72750ef7b..c23e7ff226 100644
--- a/Test/test29.m
+++ b/Test/test29.m
@@ -1,8 +1,8 @@
 function test29
 %TEST29 GrB_reduce with zombies
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [~, ~, ~, classes, ~, ~] = GB_spec_opsall ;
 
diff --git a/Test/test30.m b/Test/test30.m
index a5e2079965..6503bb5bbf 100644
--- a/Test/test30.m
+++ b/Test/test30.m
@@ -1,7 +1,7 @@
 function test30
 %TEST30 test GxB_subassign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
     [save save_chunk] = nthreads_get ;
diff --git a/Test/test30b.m b/Test/test30b.m
index db74a3c8f0..86388422d1 100644
--- a/Test/test30b.m
+++ b/Test/test30b.m
@@ -1,7 +1,7 @@
 function test30b
 %TEST30B performance test GB_mex_assign, scalar expansionb
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [save_nthreads save_chunk] = nthreads_get ;
diff --git a/Test/test31.m b/Test/test31.m
index 0cb64ca3f6..9dc141b50f 100644
--- a/Test/test31.m
+++ b/Test/test31.m
@@ -1,7 +1,7 @@
 function test31
 %TEST31 test GrB_transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n------------------- simple tests of GB_mex_transpose\n') ;
@@ -40,14 +40,15 @@
 assert (isequal (C.matrix,Cin+A)) ;
 
 ops = {
-    'first',    0,
-    'second',   0,
-    'min',      1,
-    'max',      1,
-    'plus',     1,
-    'minus',    0,
-    'times',    1,
-    'div',      0 } ;
+    'first',
+    'second',
+    'pair',
+    'min',
+    'max',
+    'plus',
+    'minus',
+    'times',
+    'div',   } ;
 
 for k = 1:length(ops)
     op = ops {k} ;
diff --git a/Test/test32.m b/Test/test32.m
index eff2547263..9d19df56b0 100644
--- a/Test/test32.m
+++ b/Test/test32.m
@@ -1,7 +1,7 @@
 function test32
 %TEST32 test GrB_mxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----- quick test for GB_mex_mxm\n') ;
diff --git a/Test/test33.m b/Test/test33.m
index 1d6ee02836..265d5ebd5a 100644
--- a/Test/test33.m
+++ b/Test/test33.m
@@ -1,7 +1,7 @@
 function test33
 %TEST33 test a semiring
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 semiring = struct ( ...
diff --git a/Test/test34.m b/Test/test34.m
index 79eae24ce9..619789a844 100644
--- a/Test/test34.m
+++ b/Test/test34.m
@@ -1,7 +1,7 @@
 function test34
 %TEST34 test GrB_eWiseAdd
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----- quick test for GB_mex_eWiseAdd_Matrix\n') ;
diff --git a/Test/test35.m b/Test/test35.m
index a30ef24eeb..2bd44aea01 100644
--- a/Test/test35.m
+++ b/Test/test35.m
@@ -1,7 +1,7 @@
 function test35
 %TEST35 test GrB_*_extractTuples
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n test35 ---------------------- quick test of GrB_extractTuples\n') ;
diff --git a/Test/test36.m b/Test/test36.m
index 7147cb260f..b0f2f875aa 100644
--- a/Test/test36.m
+++ b/Test/test36.m
@@ -1,7 +1,7 @@
 function test36
 %TEST36 performance test of matrix subref
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest36 --------------------- performance of GB_Matrix_subref\n') ;
diff --git a/Test/test37.m b/Test/test37.m
index 84a2dea62a..20ebb793c5 100644
--- a/Test/test37.m
+++ b/Test/test37.m
@@ -1,7 +1,7 @@
 function test37
 %TEST37 performance test of qsort
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n------------ testing GB_mex_qsort performance\n') ;
diff --git a/Test/test38.m b/Test/test38.m
index 08eab558a6..e1c39b1528 100644
--- a/Test/test38.m
+++ b/Test/test38.m
@@ -1,7 +1,7 @@
 function test38
 %TEST38 test GrB_transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ----------- testing GB_mex_transpose on SuiteSparse matrices\n') ;
diff --git a/Test/test39.m b/Test/test39.m
index 76e274444c..326b52f535 100644
--- a/Test/test39.m
+++ b/Test/test39.m
@@ -1,7 +1,7 @@
 function test39(use_ssget)
 %TEST39 performance test for GrB_transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest39 performance tests : GrB_transpose \n') ;
@@ -182,7 +182,7 @@ function test39(use_ssget)
 tic
 C1 = (Cin + B) + A ;
 toc
-tm4 = toc ;
+tm5 = toc ;
 
 tic
 C3 = GB_mex_eWiseAdd_Matrix (Cin, [ ], 'plus', 'plus', A, B) ;
@@ -224,7 +224,7 @@ function test39(use_ssget)
 toc (tstart)
 tg2 = grbresults ;
 fprintf ('GraphBLAS time: %g\n', tg1+tg2) ;
-fprintf ('speedup over MATLAB: %g\n\n', tm4/(tg1+tg2)) ;
+fprintf ('speedup over MATLAB: %g\n\n', tm5/(tg1+tg2)) ;
 assert (isequal (C1, C4)) ;;
 
 nthreads_set (save, save_chunk) ;
diff --git a/Test/test40.m b/Test/test40.m
index 36d55e0d02..ba452689a7 100644
--- a/Test/test40.m
+++ b/Test/test40.m
@@ -1,7 +1,7 @@
 function test40
 %TEST40 test GrB_Matrix_extractElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ------ quick test of GrB_Matrix_extractElement\n') ;
diff --git a/Test/test41.m b/Test/test41.m
index 3794f9c628..20b02d61cd 100644
--- a/Test/test41.m
+++ b/Test/test41.m
@@ -1,7 +1,7 @@
 function test41
 %TEST41 test AxB
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n -------------- simple GB_mex_AxB numeric tests\n') ;
diff --git a/Test/test42.m b/Test/test42.m
index 10f3523046..f0422d13a6 100644
--- a/Test/test42.m
+++ b/Test/test42.m
@@ -1,7 +1,7 @@
 function test42
 %TEST42 test GrB_Matrix_build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----------------------- performance tests for GrB_Matrix_build\n') ;
diff --git a/Test/test43.m b/Test/test43.m
index f991d9f310..a96775decc 100644
--- a/Test/test43.m
+++ b/Test/test43.m
@@ -1,7 +1,7 @@
 function test43
 %TEST43 test subref
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n------------------------------ testing GB_mex_Matrix_subref\n') ;
diff --git a/Test/test44.m b/Test/test44.m
index 751261122f..6c5096f12f 100644
--- a/Test/test44.m
+++ b/Test/test44.m
@@ -1,7 +1,7 @@
 function test44(longtests)
 %TEST44 test qsort
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest44\n------------------------------------- qsort tests\n') ;
diff --git a/Test/test45.m b/Test/test45.m
index 2bab79cae8..e108208b2f 100644
--- a/Test/test45.m
+++ b/Test/test45.m
@@ -1,7 +1,7 @@
 function test45(use_ssget)
 %TEST45 test GrB_*_setElement and GrB_*_*build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest45\n------------------ testing GrB_setElement and _build\n') ;
diff --git a/Test/test46.m b/Test/test46.m
index dcb7d94783..37c12168f0 100644
--- a/Test/test46.m
+++ b/Test/test46.m
@@ -1,7 +1,7 @@
 function test46
 %TEST46 performance test of GxB_subassign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n--------------performance test GB_mex_subassign\n') ;
diff --git a/Test/test46b.m b/Test/test46b.m
index bb671d4f64..1685b96353 100644
--- a/Test/test46b.m
+++ b/Test/test46b.m
@@ -1,7 +1,7 @@
 function test46b
 %TEST46B performance test of GrB_assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n--------------performance test GB_mex_assign\n') ;
diff --git a/Test/test47.m b/Test/test47.m
index eac1c508e0..f66a5128e2 100644
--- a/Test/test47.m
+++ b/Test/test47.m
@@ -1,7 +1,7 @@
 function test47
 %TEST47 prformance test of GrB_vxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test48.m b/Test/test48.m
index b7d678c585..993f219d65 100644
--- a/Test/test48.m
+++ b/Test/test48.m
@@ -1,7 +1,7 @@
 function test48
 %TEST48 performance test of GrB_mxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [save save_chunk] = nthreads_get ;
@@ -15,22 +15,22 @@
 dt_auto = struct ('inp0', 'tran') ;
 dt_dot  = struct ('inp0', 'tran', 'axb', 'dot') ;
 dt_gus  = struct ('inp0', 'tran', 'axb', 'gustavson') ;
-dt_heap = struct ('inp0', 'tran', 'axb', 'heap') ;
+dt_hash = struct ('inp0', 'tran', 'axb', 'hash') ;
 
 da_auto = struct ;
 da_dot  = struct ('axb', 'dot') ;
 da_gus  = struct ('axb', 'gustavson') ;
-da_heap = struct ('axb', 'heap') ;
+da_hash = struct ('axb', 'hash') ;
 
 dtn_auto = struct ('inp0', 'tran') ;
 dtn_dot  = struct ('inp0', 'tran', 'axb', 'dot') ;
 dtn_gus  = struct ('inp0', 'tran', 'axb', 'gustavson') ;
-dtn_heap = struct ('inp0', 'tran', 'axb', 'heap') ;
+dtn_hash = struct ('inp0', 'tran', 'axb', 'hash') ;
 
 dtt_auto = struct ('inp0', 'tran', 'inp1', 'tran') ;
 dtt_dot  = struct ('inp0', 'tran', 'inp1', 'tran', 'axb', 'dot') ;
 dtt_gus  = struct ('inp0', 'tran', 'inp1', 'tran', 'axb', 'gustavson') ;
-dtt_heap = struct ('inp0', 'tran', 'inp1', 'tran', 'axb', 'heap') ;
+dtt_hash = struct ('inp0', 'tran', 'inp1', 'tran', 'axb', 'hash') ;
 
 semiring.multiply = 'times' ;
 semiring.add = 'plus' ;
@@ -88,10 +88,10 @@
             assert (isequal (method, 'Gustavson')) ;
 
             % tic
-            ch = GB_mex_mxm (w, [],[], semiring, A, x, dt_heap) ;
+            ch = GB_mex_mxm (w, [],[], semiring, A, x, dt_hash) ;
             % t = toc ;
             [th method] = grbresults ;
-            assert (isequal (method, 'heap')) ;
+            assert (isequal (method, 'hash')) ;
 
             tic
             c0 = A'*x ;
@@ -103,9 +103,9 @@
             assert (isequal_roundoff (c0, ch.matrix)) ;
 
             fprintf ('%8d : ', nnz (x)) ;
-            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f heap: %10.4f MATLAB %10.4f', ...
+            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f hash: %10.4f MATLAB %10.4f', ...
                 ta, auto_method(1), t, tg, th, t2) ;
-            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f heap: %10.2f\n', ...
+            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f hash: %10.2f\n', ...
                 t2/ta, t2/t, t2/tg, t2/th) ;
 
         end
@@ -144,10 +144,10 @@
             assert (isequal (method, 'Gustavson')) ;
 
             % tic
-            ch = GB_mex_mxm (w, [],[], semiring, A, x, da_heap) ;
+            ch = GB_mex_mxm (w, [],[], semiring, A, x, da_hash) ;
             % t = toc ;
             [th method] = grbresults ;
-            assert (isequal (method, 'heap')) ;
+            assert (isequal (method, 'hash')) ;
 
             tic
             c0 = A*x ;
@@ -159,9 +159,9 @@
             assert (isequal_roundoff (c0, ch.matrix)) ;
 
             fprintf ('%8d : ', nnz (x)) ;
-            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f heap: %10.4f MATLAB %10.4f', ...
+            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f hash: %10.4f MATLAB %10.4f', ...
                 ta, auto_method(1), t, tg, th, t2) ;
-            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f heap: %10.2f\n', ...
+            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f hash: %10.2f\n', ...
                 t2/ta, t2/t, t2/tg, t2/th) ;
 
         end
@@ -202,10 +202,10 @@
             assert (isequal (method, 'Gustavson')) ;
 
             % tic
-            ch = GB_mex_mxm (w, [],[], semiring, x, A, dtn_heap) ;
+            ch = GB_mex_mxm (w, [],[], semiring, x, A, dtn_hash) ;
             % t = toc ;
             [th method] = grbresults ;
-            assert (isequal (method, 'heap')) ;
+            assert (isequal (method, 'hash')) ;
 
             tic
             c0 = x'*A ;
@@ -218,9 +218,9 @@
             assert (isequal_roundoff (c0, ch.matrix)) ;
 
             fprintf ('%8d : ', nnz (x)) ;
-            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f heap: %10.4f MATLAB %10.4f', ...
+            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f hash: %10.4f MATLAB %10.4f', ...
                 ta, auto_method(1), t, tg, th, t2) ;
-            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f heap: %10.2f\n', ...
+            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f hash: %10.2f\n', ...
                 t2/ta, t2/t, t2/tg, t2/th) ;
 
         end
@@ -259,10 +259,10 @@
             assert (isequal (method, 'Gustavson')) ;
 
             % tic
-            ch = GB_mex_mxm (w, [],[], semiring, x, A, dtt_heap) ;
+            ch = GB_mex_mxm (w, [],[], semiring, x, A, dtt_hash) ;
             % t = toc ;
             [th method] = grbresults ;
-            assert (isequal (method, 'heap')) ;
+            assert (isequal (method, 'hash')) ;
 
             tic
             c0 = x'*A' ;
@@ -274,9 +274,9 @@
             assert (isequal_roundoff (c0, ch.matrix)) ;
 
             fprintf ('%8d : ', nnz (x)) ;
-            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f heap: %10.4f MATLAB %10.4f', ...
+            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f hash: %10.4f MATLAB %10.4f', ...
                 ta, auto_method(1), t, tg, th, t2) ;
-            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f heap: %10.2f\n', ...
+            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f hash: %10.2f\n', ...
                 t2/ta, t2/t, t2/tg, t2/th) ;
 
         end
@@ -318,10 +318,10 @@
             assert (isequal (method, 'Gustavson')) ;
 
             % tic
-            ch = GB_mex_mxm (w, [],[], semiring, A, x, dtt_heap) ;
+            ch = GB_mex_mxm (w, [],[], semiring, A, x, dtt_hash) ;
             % t = toc ;
             [th method] = grbresults ;
-            assert (isequal (method, 'heap')) ;
+            assert (isequal (method, 'hash')) ;
 
             tic
             c0 = A'*x' ;
@@ -333,9 +333,9 @@
             assert (isequal_roundoff (c0, ch.matrix)) ;
 
             fprintf ('%8d : ', nnz (x)) ;
-            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f heap: %10.4f MATLAB %10.4f', ...
+            fprintf ('auto: %10.4f(%s) dot: %10.4f gus: %10.4f hash: %10.4f MATLAB %10.4f', ...
                 ta, auto_method(1), t, tg, th, t2) ;
-            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f heap: %10.2f\n', ...
+            fprintf (' speedup auto: %10.2f dot: %10.2f gus: %10.2f hash: %10.2f\n', ...
                 t2/ta, t2/t, t2/tg, t2/th) ;
 
         end
diff --git a/Test/test49.m b/Test/test49.m
index d845589cdf..077d918243 100644
--- a/Test/test49.m
+++ b/Test/test49.m
@@ -1,7 +1,7 @@
 function test49
 %TEST49 performance test of GrB_mxm (dot product method, A'*B)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test50.m b/Test/test50.m
index fc06209b78..8a97fa2430 100644
--- a/Test/test50.m
+++ b/Test/test50.m
@@ -1,7 +1,7 @@
 function test50
 %TEST50 test AxB numeric and symbolic
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----------------------------- GB_mex_AxB\n') ;
diff --git a/Test/test51.m b/Test/test51.m
index 32e658c3d4..08f0571094 100644
--- a/Test/test51.m
+++ b/Test/test51.m
@@ -1,7 +1,7 @@
 function test51
 %TEST51 test GxB_subassign, multiply operations
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n-----------performance test GB_mex_subassign, multiple ops\n') ;
diff --git a/Test/test51b.m b/Test/test51b.m
index 68557a5be0..d829c2ab9a 100644
--- a/Test/test51b.m
+++ b/Test/test51b.m
@@ -1,7 +1,7 @@
 function test51b
 %TEST51B test GrB_assign, multiply operations
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n-----------performance test GB_mex_assign, multiple ops\n') ;
diff --git a/Test/test52.m b/Test/test52.m
index 8e5d020b5a..2283a115be 100644
--- a/Test/test52.m
+++ b/Test/test52.m
@@ -1,7 +1,7 @@
 function test52
 %TEST52 test AdotB vs AxB
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----------------------- AdotB versus AxB\n') ;
diff --git a/Test/test53.m b/Test/test53.m
index 6c0f38d9e4..c0e482eaeb 100644
--- a/Test/test53.m
+++ b/Test/test53.m
@@ -1,7 +1,7 @@
 function test53(fulltests)
 %TEST53 test GrB_Matrix_extract
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test54.m b/Test/test54.m
index a3eced17b3..eb1f0e994c 100644
--- a/Test/test54.m
+++ b/Test/test54.m
@@ -1,7 +1,7 @@
 function test54
 %TEST54 test GB_subref (numeric case) with I=lo:hi, J=lo:hi
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest54: ==== quick test for subref and assign (lo:stride:hi):\n') ;
diff --git a/Test/test55.m b/Test/test55.m
index 712f35f727..adaf95ff82 100644
--- a/Test/test55.m
+++ b/Test/test55.m
@@ -1,7 +1,7 @@
 function test55
 %TEST55 test GxB_subassign, illustrate duplicate indices, MATLAB vs GraphBLAS
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % MATLAB and GraphBLAS differ on how repeated indices are handled
diff --git a/Test/test55b.m b/Test/test55b.m
index 019cc0e3f1..176115c45b 100644
--- a/Test/test55b.m
+++ b/Test/test55b.m
@@ -1,7 +1,7 @@
 function test55b
 %TEST55B test GrB_assign, illustrate duplicate indices, MATLAB vs GraphBLAS
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % MATLAB and GraphBLAS differ on how repeated indices are handled
diff --git a/Test/test56.m b/Test/test56.m
index d6d57dc529..c8ca30d0ca 100644
--- a/Test/test56.m
+++ b/Test/test56.m
@@ -1,7 +1,7 @@
 function test56
 %TEST56 test GrB_*_build
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 op.opname = 'min'
diff --git a/Test/test57.m b/Test/test57.m
index 0e25872063..b48919517a 100644
--- a/Test/test57.m
+++ b/Test/test57.m
@@ -5,7 +5,7 @@ function test57 (op)
 %   test57(op)
 %   test57      % Default op is 'max' if no arguments given
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test58.m b/Test/test58.m
index 775c42484e..913fedcf22 100644
--- a/Test/test58.m
+++ b/Test/test58.m
@@ -1,7 +1,7 @@
 function test58 (cover)
 %TEST58 test GrB_eWiseAdd
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin < 1)
diff --git a/Test/test59.m b/Test/test59.m
index 19a5a11ff5..4df26cbd05 100644
--- a/Test/test59.m
+++ b/Test/test59.m
@@ -1,7 +1,7 @@
 function test59
 %TEST59 test GrB_mxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----- quick test for GB_mex_mxm\n') ;
diff --git a/Test/test60.m b/Test/test60.m
index 6e1aa3dd67..ed9e970aa0 100644
--- a/Test/test60.m
+++ b/Test/test60.m
@@ -1,7 +1,7 @@
 function test60
 %TEST60 test min and max operators with NaNs
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('min\n') ;
diff --git a/Test/test61.m b/Test/test61.m
index b95e86ba40..a03ca9d80d 100644
--- a/Test/test61.m
+++ b/Test/test61.m
@@ -1,7 +1,7 @@
 function test61
 %TEST61 performance test of GrB_eWiseMult
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----------------------------- eWiseMult performance tests\n') ;
diff --git a/Test/test62.m b/Test/test62.m
index b4eafa99a6..b50ba9656b 100644
--- a/Test/test62.m
+++ b/Test/test62.m
@@ -1,7 +1,7 @@
 function test62
 %TEST62 test GrB_apply
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ------------ testing GrB_apply\n') ;
diff --git a/Test/test63.m b/Test/test63.m
index b1bd065a51..a2b6c2309c 100644
--- a/Test/test63.m
+++ b/Test/test63.m
@@ -1,7 +1,7 @@
 function test63
 %TEST63 test GraphBLAS operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test64.m b/Test/test64.m
index cac1beb024..29f0a55d63 100644
--- a/Test/test64.m
+++ b/Test/test64.m
@@ -1,7 +1,7 @@
 function test64
 %TEST64 test GxB_*_subassign, scalar expansion, with and without duplicates
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ------------------- quick test of GxB_*_subassign_scalar\n') ;
diff --git a/Test/test64b.m b/Test/test64b.m
index 39419b7e94..c924e2d7fc 100644
--- a/Test/test64b.m
+++ b/Test/test64b.m
@@ -1,7 +1,7 @@
 function test64b
 %TEST64B test GrB_*_assign, scalar expansion, with and without duplicates
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n ------------------- quick test of GrB_*_assign_scalar\n') ;
diff --git a/Test/test65.m b/Test/test65.m
index ed6514e2f0..9a52c6c664 100644
--- a/Test/test65.m
+++ b/Test/test65.m
@@ -1,7 +1,7 @@
 function test65
 %TEST65 test type casting
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 X = logical (rand (4) > 0.5) ;
diff --git a/Test/test66.m b/Test/test66.m
index 834e9aca88..5d9d264256 100644
--- a/Test/test66.m
+++ b/Test/test66.m
@@ -1,7 +1,7 @@
 function test66
 %TEST66 test GrB_reduce
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest66: ---- quick test for GrB_reduce_to_scalar and vector\n') ;
diff --git a/Test/test67.m b/Test/test67.m
index 3a7cf6668d..aee5c1ab4a 100644
--- a/Test/test67.m
+++ b/Test/test67.m
@@ -1,7 +1,7 @@
 function test67
 %TEST67 test GrB_apply
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n---------------------------- quick test of GrB_apply\n') ;
@@ -13,19 +13,6 @@
 C1 = -A ;
 assert (isequal (full (C0.matrix), C1))
 
-% compile time; requires User/my_scale.m4 to be present
-ok = true ;
-try
-    % value of my_scalar in GB_mex_apply is 2
-    C0 = GB_mex_apply (C, [ ], '', 'my_scale', A) ;
-    C1 = 2*A ;
-    ok = isequal (full (C0.matrix), C1) ;
-    fprintf ('user compile-time my_scale.m4 is available\n') ;
-catch
-    fprintf ('user compile-time my_scale.m4 not available\n') ;
-end
-assert (ok) ;
-
 C0 = GB_mex_apply (C, [ ], 'plus', 'identity', A) ;
 C1 = C + A ;
 assert (isequal (full (C0.matrix), C1))
diff --git a/Test/test68.m b/Test/test68.m
index 7f6df8690d..7a56be5b12 100644
--- a/Test/test68.m
+++ b/Test/test68.m
@@ -1,7 +1,7 @@
 function test68(n)
 %TEST68 performance tests for eWiseMult
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest68 --------------------------- quick test of GrB_eWiseMult\n') ;
diff --git a/Test/test69.m b/Test/test69.m
index d336beb0e4..65ced2086c 100644
--- a/Test/test69.m
+++ b/Test/test69.m
@@ -1,39 +1,69 @@
 function test69
 %TEST69 test GrB_assign with aliased inputs, C<C>(:,:) = accum(C(:,:),C)
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+fprintf ('test69 ------------------  assign alias tests\n') ;
+
 rng ('default') ;
 
 semiring.multiply = 'times' ;
 semiring.add = 'plus' ;
 semiring.class = 'double' ;
 
+desc = struct ('outp', 'replace') ;
+
 seed = 1 ;
 for m = [1 5 10 100]
     for n = [1 5 10 100]
+        fprintf ('.') ;
+
         for trial = 1:30
             A = GB_mex_random (m, n, 10*n, 0, seed) ; seed = seed + 1 ;
             C = GB_mex_random (m, n, 10*n, 0, seed) ; seed = seed + 1 ;
 
+            % C<C> += C
             C1 = GB_mex_assign_alias (C, 'plus', [ ], [ ], [ ]) ;
             C2 = GB_mex_assign (C, [ ], 'plus', C, [ ], [ ], [ ], 0) ;
             assert (isequal (C1, C2)) ;
 
+            % C<C,replace> += C
+            C1 = GB_mex_assign_alias (C, 'plus', [ ], [ ], desc) ;
+            C2 = GB_mex_assign (C, [ ], 'plus', C, [ ], [ ], desc, 0) ;
+            assert (isequal (C1, C2)) ;
+
+            % C<C,replace> = C
+            C1 = GB_mex_assign_alias (C, [ ], [ ], [ ], desc) ;
+            C2 = GB_mex_assign (C, [ ], [ ], C, [ ], [ ], desc, 0) ;
+            assert (isequal (C1, C2)) ;
+
+            % C(I,J)<C> += C(I,J)
             I = uint64 (randperm (m) - 1) ;
             J = uint64 (randperm (n) - 1) ;
-
             C1 = GB_mex_assign_alias (C, 'plus', I, J, [ ]) ;
             C2 = GB_mex_assign (C, [ ], 'plus', C, I, J, [ ], 0) ;
             assert (isequal (C1, C2)) ;
 
-            C1 = GB_mex_subassign_alias (C, 'plus', [ ]) ;
-            C2 = GB_mex_subassign (C, C, 'plus', C, [ ], [ ], [ ]) ;
+            % C<C,replace> += C
+            C1 = GB_mex_subassign_alias (C, 'plus', desc) ;
+            C2 = GB_mex_subassign (C, C, 'plus', C, [ ], [ ], desc) ;
+            assert (isequal (C1, C2)) ;
+
+            % C<C,replace> = C
+            C1 = GB_mex_subassign_alias (C, [ ], desc) ;
+            C2 = GB_mex_subassign (C, C, [ ], C, [ ], [ ], desc) ;
             assert (isequal (C1, C2)) ;
+
+            % C(:,:) = 0
+            Z = GB_mex_expand (sparse (1), 0) ;
+            C1 = GB_mex_subassign (C, [ ], [ ], Z, [ ], [ ], desc) ;
+            C2 = sparse (m, n) ;
+            assert (isequal (1 * C1.matrix, C2)) ;
+
         end
     end
 end
 
-fprintf ('test69: assign alias tests passed\n') ;
+fprintf ('\ntest69: assign alias tests passed\n') ;
 
diff --git a/Test/test70.m b/Test/test70.m
index 3583f8a7fb..fbddf0b1cd 100644
--- a/Test/test70.m
+++ b/Test/test70.m
@@ -21,8 +21,8 @@ function test70 (f)
 %
 % depends on functions in ../Demo/MATLAB
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 addpath ('../Demo/MATLAB') ;
 
diff --git a/Test/test70_plot.m b/Test/test70_plot.m
index e8074d7091..6f307821e6 100644
--- a/Test/test70_plot.m
+++ b/Test/test70_plot.m
@@ -1,8 +1,8 @@
 function test70_plot (T, Nedges, Nnodes)
 %TEST70_PLOT plot the results from test70
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
     load test70_results
diff --git a/Test/test71.m b/Test/test71.m
index ca2fd8930f..1b17ea9745 100644
--- a/Test/test71.m
+++ b/Test/test71.m
@@ -19,8 +19,8 @@ function test71(f)
 %
 % Edit ll_memory_limit and nz_limit to match the memory on your machine.
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [save save_chunk] = nthreads_get ;
 chunk = 4096 ;
diff --git a/Test/test71_plot.m b/Test/test71_plot.m
index 498d5461f6..063d2ee979 100644
--- a/Test/test71_plot.m
+++ b/Test/test71_plot.m
@@ -1,8 +1,8 @@
 function test71_plot (T, Nedges, Nnodes, LLnz, LLmem, LLflops, Ntri, f)
 %TEST71_PLOT plot the results from test71
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 if (nargin == 0)
     if (ismac || ispc)
diff --git a/Test/test71_table.m b/Test/test71_table.m
index 579a3df4ad..bdd4897fe7 100644
--- a/Test/test71_table.m
+++ b/Test/test71_table.m
@@ -1,8 +1,8 @@
 function test71_table
 %TEST71_TABLE print the table for triangle counting results
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 load test71_results
 
diff --git a/Test/test72.m b/Test/test72.m
index 5c9385c6de..f9bceff485 100644
--- a/Test/test72.m
+++ b/Test/test72.m
@@ -1,8 +1,8 @@
 function test72
 %TEST72 special cases for mxm, ewise, ...
 
-%  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
-%  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n--------------test72: special cases\n') ;
 
diff --git a/Test/test73.m b/Test/test73.m
index 8c403ee2f4..7f3d96a2c4 100644
--- a/Test/test73.m
+++ b/Test/test73.m
@@ -1,7 +1,7 @@
 function test73
 %TEST73 performance of C = A*B, with mask
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n----------------- C=A*B performance\n') ;
diff --git a/Test/test74.m b/Test/test74.m
index e503ad2346..fb35a56b0c 100644
--- a/Test/test74.m
+++ b/Test/test74.m
@@ -1,7 +1,7 @@
 function test74
 %TEST74 test GrB_mxm: all built-in semirings
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
@@ -14,26 +14,28 @@
 dtt = struct ( 'inp0', 'tran', 'inp1', 'tran' ) ;
 
 dnn_Gus  = struct ( 'axb', 'gustavson' ) ;
-dnn_heap = struct ( 'axb', 'heap' ) ;
+dnn_hash = struct ( 'axb', 'hash' ) ;
 
 ntrials = 0 ;
 
 rng ('default') ;
 
-m_list = [ 1  2    9] ;
-n_list = [ 1  2   10] ;
-k_list = [ 20 100 12] ;
+m_list = [ 1  2    9  ] ;
+n_list = [ 1  2   10  ] ;
+k_list = [ 20 100 12  ] ;
+d_list = [0.3 0.3 0.3 ] ;
 
 for k0 = 1:size(m_list,2)
 
     m = m_list (k0) ;
     n = n_list (k0) ;
     k = k_list (k0) ;
+    density = d_list (k0) ;
 
     n_semirings = 0 ;
-    A = GB_spec_random (m,k,0.3,100,'none') ;
-    B = GB_spec_random (k,n,0.3,100,'none') ;
-    C = GB_spec_random (m,n,0.3,100,'none') ;
+    A = GB_spec_random (m,k,density,100,'none') ;
+    B = GB_spec_random (k,n,density,100,'none') ;
+    C = GB_spec_random (m,n,density,100,'none') ;
     M = spones (sprandn (m, n, 0.3)) ;
 
     clear AT
@@ -92,9 +94,9 @@
                 % C0 = GB_spec_mxm (C, [ ], [ ], semiring, A, B, dnn_Gus);
                 GB_spec_compare (C0, C1, identity) ;
 
-                % C = A*B, no Mask, no typecasting, heap
-                C1 = GB_mex_mxm  (C, [ ], [ ], semiring, A, B, dnn_heap);
-                % C0 = GB_spec_mxm (C, [ ], [ ], semiring, A, B, dnn_heap);
+                % C = A*B, no Mask, no typecasting, Hash
+                C1 = GB_mex_mxm  (C, [ ], [ ], semiring, A, B, dnn_hash);
+                % C0 = GB_spec_mxm (C, [ ], [ ], semiring, A, B, dnn_hash);
                 GB_spec_compare (C0, C1, identity) ;
 
             end
diff --git a/Test/test75.m b/Test/test75.m
index 8a24d5c028..4cdfa27c37 100644
--- a/Test/test75.m
+++ b/Test/test75.m
@@ -1,7 +1,7 @@
 function test75
-%TEST75 test GrB_mxm and GrB_vxm on all semirings (A'B dot product)
+%TEST75 test GrB_mxm and GrB_vxm on all semirings
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [mult_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
@@ -59,6 +59,12 @@
 fprintf ('\n-------------- GrB_mxm, vxm (dot product) on all semirings\n') ;
 
 Cin = sparse (n, n) ;
+
+Din = 10 * sparse (rand (n, n)) ;
+D.matrix = Din ;
+D.class = 'see below' ;
+D.pattern = true (n,n) ;
+
 Xin = sparse (n, 1) ;
 
 Mask = sparse (ones (n,n)) ;
@@ -66,6 +72,8 @@
 
 dnn = struct ;
 dtn = struct ( 'inp0', 'tran' ) ;
+dtn_dot   = struct ( 'inp0', 'tran', 'axb', 'dot' ) ;
+dtn_saxpy = struct ( 'inp0', 'tran', 'axb', 'saxpy' ) ;
 dnt = struct ( 'inp1', 'tran' ) ;
 dtt = struct ( 'inp0', 'tran', 'inp1', 'tran' ) ;
 
@@ -102,67 +110,71 @@
                 continue
             end
 
-            % there are 1440 semirings that pass this test:
-            % 19 ops: 10:(1st, 2nd, min, max, plus, minus, rminus, times, div, rdiv)
-            %         6:(is*)
-            %         3:(or,and,xor)
-            %       TxT->T
-            %       each has 44 monoids: all 11 types: max,min,plus,times
-            %       and 4 for boolean or,and,xor,eq
-            %       17*48 = 912
-            % 6 ops: eq,ne,gt,lt,ge,le
-            %       TxT->bool
-            %       each has 11 types
-            %       and 8 monoids (max,min,plus,times,or,and,xor,eq)
-            %       6*11*8 = 528
-            % 912 + 528 = 1440
-            % but only 1040 are unique.
-            % see GrB_AxB_builtin for details.
-
             A.class = clas ;
             B.class = clas ;
             X.class = clas ;
             Y.class = clas ;
+            D.class = add_op.opclass ;
 
             n_semirings = n_semirings + 1 ;
             fprintf ('.') ;
 
-            % C = A'*B, with mask
-            tic
-            C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn);
-            t2 = toc ;
-            C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn);
+            % C<M> = A'*B, with mask
+            C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn_dot);
+            C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+            C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn_saxpy);
+            GB_spec_compare (C1, C2, id) ;
+
+            % C<M> += A'*B, C dense, typecasting of C
+            C1 = GB_mex_mxm  (Din, Mask, add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (Din, Mask, add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+            C1 = GB_mex_mxm  (Din, Mask, add_op, semiring, A, B, dtn_saxpy) ;
+            GB_spec_compare (C1, C2, id) ;
+
+            % C<M> += A'*B, C dense, no typecasting of C
+            C1 = GB_mex_mxm  (D, Mask, add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (D, Mask, add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+            C1 = GB_mex_mxm  (D, Mask, add_op, semiring, A, B, dtn_saxpy) ;
+            GB_spec_compare (C1, C2, id) ;
+
+            % C += A'*B, C dense, typecasting of C
+            C1 = GB_mex_mxm  (Din, [ ], add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (Din, [ ], add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+            C1 = GB_mex_mxm  (Din, [ ], add_op, semiring, A, B, dtn_saxpy) ;
+            GB_spec_compare (C1, C2, id) ;
+
+            % C += A'*B, C dense, no typecasting of C
+            C1 = GB_mex_mxm  (D, [ ], add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (D, [ ], add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+            C1 = GB_mex_mxm  (D, [ ], add_op, semiring, A, B, dtn_saxpy) ;
             GB_spec_compare (C1, C2, id) ;
 
             % X = u*A, with mask
-            tic
-            C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, X, A, [ ]);
-            t2 = toc ;
-            C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]);
+            C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, X, A, [ ]) ;
+            C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]) ;
             GB_spec_compare (C1, C2, id) ;
 
             if (k3 == 1)
-                % repeat but with typecasing, to test generic A'*B
+                % repeat but with typecasting, to test generic A'*B
                 A.class = 'double' ;
 
                 % C = A'*B, with mask
-                tic
                 C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn);
-                t2 = toc ;
                 C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn);
                 GB_spec_compare (C1, C2, id) ;
 
                 % X = u*A, with mask
-                tic
                 C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, X, A, [ ]);
-                t2 = toc ;
                 C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]);
                 GB_spec_compare (C1, C2, id) ;
 
                 % X = u*A, with mask
-                tic
                 C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, Y, A, [ ]);
-                t2 = toc ;
                 C2 = GB_spec_vxm (Xin, mask, [ ], semiring, Y, A, [ ]);
                 GB_spec_compare (C1, C2, id) ;
 
diff --git a/Test/test75b.m b/Test/test75b.m
new file mode 100644
index 0000000000..4bd4adaa1d
--- /dev/null
+++ b/Test/test75b.m
@@ -0,0 +1,230 @@
+function test75b
+%TEST75B GrB_mxm and GrB_vxm on all semirings (shorter test than test75)
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
+[mult_ops, ~, add_ops, classes, ~, ~] = GB_spec_opsall ;
+
+% cstart = grb_get_coverage ;
+% fprintf ('coverage start: %d\n', cstart) ;
+% cc = zeros (15,1) ;
+
+rng ('default') ;
+
+m = 200 ;
+n = 5 ;
+A_sparse = sprandn (m, n, 0.1) ;
+A_sparse (:,3) = 0 ;
+A_sparse (2,3) = 1.7 ;
+A_sparse (18,3) = 2.2 ;
+A_sparse (:,1:2) = sparse (rand (m,2)) ;
+A_sparse (1,1) = 0;
+A_sparse (18,1) = 0;
+A_sparse (:,5) = 0 ;
+A_sparse (1,5) = 11 ;
+A_sparse (2,5) = 23 ;
+A_sparse (18,5) = 33 ;
+
+B_sparse = sprandn (m, n, 0.1) ;
+B_sparse (:,1) = 0 ;
+B_sparse (1,1) = 3 ;
+B_sparse (18,1) = 2 ;
+B_sparse (:,[2 n]) = sparse (rand (m,2)) ;
+B_sparse (3,2) = 0 ;
+B_sparse (18,2) = 0 ;
+A_sparse (:,3) = 0 ;
+B_sparse (2,1) = 7 ;
+B_sparse (18,1) = 8 ;
+B_sparse (19,1) = 9 ;
+
+x_sparse = sparse (rand (m,1)) ;
+x_sparse (99) = 0 ;
+
+y_sparse = sparse (zeros (m,1)) ;
+y_sparse (99) = 1 ;
+
+A.matrix = A_sparse ;
+A.class = 'see below' ;
+A.pattern = logical (spones (A_sparse)) ;
+
+B.matrix = B_sparse ;
+B.class = 'see below' ;
+B.pattern = logical (spones (B_sparse)) ;
+
+X.matrix = x_sparse ;
+X.class = 'see below' ;
+X.pattern = logical (spones (x_sparse)) ;
+
+Y.matrix = y_sparse ;
+Y.class = 'see below' ;
+Y.pattern = logical (spones (y_sparse)) ;
+
+fprintf ('\n-------------- GrB_mxm, vxm (dot product) on all semirings\n') ;
+
+Cin = sparse (n, n) ;
+
+Din = 10 * sparse (rand (n, n)) ;
+D.matrix = Din ;
+D.class = 'see below' ;
+D.pattern = true (n,n) ;
+
+Xin = sparse (n, 1) ;
+
+Mask = sparse (ones (n,n)) ;
+mask = sparse (ones (n,1)) ;
+
+dnn = struct ;
+dtn = struct ( 'inp0', 'tran' ) ;
+dtn_dot   = struct ( 'inp0', 'tran', 'axb', 'dot' ) ;
+dtn_saxpy = struct ( 'inp0', 'tran', 'axb', 'saxpy' ) ;
+dnt = struct ( 'inp1', 'tran' ) ;
+dtt = struct ( 'inp0', 'tran', 'inp1', 'tran' ) ;
+
+n_semirings = 0 ;
+
+% ccall = zeros (1,15)  ;
+
+for k1 = 1:length(mult_ops)
+    mulop = mult_ops {k1} ;
+    fprintf ('\n%s', mulop) ;
+
+    for k2 = 1:length(add_ops)
+        addop = add_ops {k2} ;
+
+        for k3 = 1:length (classes)
+            clas = classes {k3} ;
+
+            semiring.multiply = mulop ;
+            semiring.add = addop ;
+            semiring.class = clas ;
+
+            % create the semiring.  some are not valid because the or,and,xor,eq
+            % monoids can only be used when z is boolean for z=mult(x,y).
+            try
+                [mult_op add_op id] = GB_spec_semiring (semiring) ;
+                [mult_opname mult_opclass zclass] = GB_spec_operator (mult_op) ;
+                [ add_opname  add_opclass] = GB_spec_operator (add_op) ;
+                identity = GB_spec_identity (semiring.add, add_opclass) ;
+            catch me
+                if (~isempty (strfind (me.message, 'gotcha')))
+                    semiring
+                    pause
+                end
+                continue
+            end
+
+            A.class = clas ;
+            B.class = clas ;
+            X.class = clas ;
+            Y.class = clas ;
+            D.class = add_op.opclass ;
+
+            n_semirings = n_semirings + 1 ;
+            fprintf ('.') ;
+
+            % C<M> = A'*B, with mask
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn_dot);
+%             C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (1) = cc (1) + cdelta ;
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn_saxpy);
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (2) = cc (2) + cdelta ;
+
+            % C<M> += A'*B, C dense, typecasting of C
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (Din, Mask, add_op, semiring, A, B, dtn_dot) ;
+%             C2 = GB_spec_mxm (Din, Mask, add_op, semiring, A, B, dtn) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (3) = cc (3) + cdelta ;
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (Din, Mask, add_op, semiring, A, B, dtn_saxpy) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (4) = cc (4) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+            % C<M> += A'*B, C dense, no typecasting of C
+%             C1 = GB_mex_mxm  (D, Mask, add_op, semiring, A, B, dtn_dot) ;
+%             C2 = GB_spec_mxm (D, Mask, add_op, semiring, A, B, dtn) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (5) = cc (5) + cdelta ;
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (D, Mask, add_op, semiring, A, B, dtn_saxpy) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (6) = cc (6) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+            % C += A'*B, C dense, typecasting of C
+            % (test coverage: 96)
+            C1 = GB_mex_mxm  (Din, [ ], add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (Din, [ ], add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (7) = cc (7) + cdelta ;
+% c1 = grb_get_coverage  ;
+%            C1 = GB_mex_mxm  (Din, [ ], add_op, semiring, A, B, dtn_saxpy) ;
+%            GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (8) = cc (8) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+            % C += A'*B, C sparse, no typecasting of C
+            % (test coverage: 1,234)
+            C1 = GB_mex_mxm  (D, [ ], add_op, semiring, A, B, dtn_dot) ;
+            C2 = GB_spec_mxm (D, [ ], add_op, semiring, A, B, dtn) ;
+            GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (10) = cc (10) + cdelta ;
+% c1 = grb_get_coverage  ;
+%             C1 = GB_mex_mxm  (D, [ ], add_op, semiring, A, B, dtn_saxpy) ;
+%             GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (11) = cc (11) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+            % X = u*A, with mask (test coverage: 12)
+            C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, X, A, [ ]) ;
+            C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]) ;
+            GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (12) = cc (12) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+%             if (k3 == 1)
+%               % repeat but with typecasting, to test generic A'*B
+%               A.class = 'double' ;
+
+%               % C = A'*B, with mask
+%               C1 = GB_mex_mxm  (Cin, Mask, [ ], semiring, A, B, dtn);
+%               C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn);
+%               GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (13) = cc (13) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+%               % X = u*A, with mask
+%               C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, X, A, [ ]);
+%               C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]);
+%               GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (14) = cc (14) + cdelta ;
+% c1 = grb_get_coverage  ;
+
+%               % X = u*A, with mask
+%               C1 = GB_mex_vxm  (Xin, mask, [ ], semiring, Y, A, [ ]);
+%               C2 = GB_spec_vxm (Xin, mask, [ ], semiring, Y, A, [ ]);
+%               GB_spec_compare (C1, C2, id) ;
+% c2 = grb_get_coverage ; cdelta = c2 - c1 ; cc (15) = cc (15) + cdelta ;
+
+%            end
+
+% ccall (n_semirings,:) = cc ;
+        end
+    end
+end
+
+n_semirings
+% cc
+% cfin = grb_get_coverage ;
+% fprintf ('coverage end: %d\n', cfin) ;
+% save saveme2 cc ccall
+
+fprintf ('\ntest75b: all tests passed\n') ;
+
+
diff --git a/Test/test76.m b/Test/test76.m
index 8ec6c49b04..ea6cc65c62 100644
--- a/Test/test76.m
+++ b/Test/test76.m
@@ -1,7 +1,7 @@
 function test76
 %TEST76 test GxB_resize
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 clear
diff --git a/Test/test77.m b/Test/test77.m
index 0342def736..b302a6c05f 100644
--- a/Test/test77.m
+++ b/Test/test77.m
@@ -1,7 +1,7 @@
 function test77 (fulltest)
 %TEST77 test GxB_kron
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [bin_ops, ~, ~, classes, ~, ~] = GB_spec_opsall ;
diff --git a/Test/test78.m b/Test/test78.m
index 613cb5594b..fe43b85bc4 100644
--- a/Test/test78.m
+++ b/Test/test78.m
@@ -1,7 +1,7 @@
 function test78
 %TEST78 test subref
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 n = 500 ;
diff --git a/Test/test79.m b/Test/test79.m
index 43fc328a2d..c3166de298 100644
--- a/Test/test79.m
+++ b/Test/test79.m
@@ -1,6 +1,9 @@
 function test79
 %TEST79 run all matrices with test06
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 index = ssget ;
 f = find (index.nrows == index.ncols & index.isReal & index.nrows > 1000) ;
 
diff --git a/Test/test80.m b/Test/test80.m
index b02fa8f5fc..63c69d9c95 100644
--- a/Test/test80.m
+++ b/Test/test80.m
@@ -1,6 +1,9 @@
 function test80
 %TEST80 rerun test06 with different matrices
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 n = 33 ;
 A = speye (n) ;
diff --git a/Test/test81.m b/Test/test81.m
index 008b79ff45..68f70f2b2f 100644
--- a/Test/test81.m
+++ b/Test/test81.m
@@ -1,6 +1,9 @@
 function test81
 %TEST81 test GrB_Matrix_extract with index range, stride, & backwards
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test81:  GrB_Matrix_extract with index range, stride, backwards\n') ;
 
 rng ('default') ;
diff --git a/Test/test82.m b/Test/test82.m
index 4404fcbb3e..12d692972d 100644
--- a/Test/test82.m
+++ b/Test/test82.m
@@ -1,6 +1,9 @@
 function test82
 %TEST82 test GrB_Matrix_extract with index range (hypersparse)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test82: test GrB_Matrix_extract with index range (hypersparse)\n') ;
 
 rng ('default') ;
diff --git a/Test/test83.m b/Test/test83.m
index 66548c164d..1ac8cc241b 100644
--- a/Test/test83.m
+++ b/Test/test83.m
@@ -1,6 +1,9 @@
 function test83
 %TEST83 test GrB_assign with J=lo:0:hi, an empty list, and C_replace true
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 % exercises the C_replace_phase of GB_assign.c
 
 rng ('default') ;
diff --git a/Test/test84.m b/Test/test84.m
index 4209fe82fc..46952180d9 100644
--- a/Test/test84.m
+++ b/Test/test84.m
@@ -1,6 +1,9 @@
 function test84
 %TEST84 test GrB_assign (row and column with C in CSR format)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 m = 10 ;
 n = 20 ;
diff --git a/Test/test85.m b/Test/test85.m
index 8f845b4de5..03a5a4c4d6 100644
--- a/Test/test85.m
+++ b/Test/test85.m
@@ -1,6 +1,9 @@
 function test85
 %TEST85 test GrB_transpose: 1-by-n with typecasting
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 A.matrix = sparse ([ 1 2 3 4]) ;
 A.class  = 'single' ;
 
diff --git a/Test/test86.m b/Test/test86.m
index 00fa14bf7f..9b4f0e46cb 100644
--- a/Test/test86.m
+++ b/Test/test86.m
@@ -1,6 +1,9 @@
 function test86
 %TEST86 performance test of of GrB_Matrix_extract
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test86: performance test of of GrB_Matrix_extract\n') ;
 
 [save save_chunk] = nthreads_get ;
diff --git a/Test/test87.m b/Test/test87.m
index e8292d35b3..91e7b79c45 100644
--- a/Test/test87.m
+++ b/Test/test87.m
@@ -1,6 +1,9 @@
 function test87
 %TEST87 performance test of GrB_mxm
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 [save save_chunk] = nthreads_get ;
 chunk = 4096 ;
 nthreads = feature ('numcores') ;
@@ -75,7 +78,7 @@
 
 % this is slower than GB_mex_AxB (A',B) even though it uses the
 % same method, because the MATLAB A' above is non-hypersparse,
-% but the internal AT=A' in GB_AxB_meta is hypersparse.
+% but the internal AT=A' is hypersparse.
 
 fprintf ('GrB A''*B native (AT becomes hypersparse):\n') ;
 tic
diff --git a/Test/test88.m b/Test/test88.m
index e3b4d31577..0f34c67dba 100644
--- a/Test/test88.m
+++ b/Test/test88.m
@@ -1,6 +1,9 @@
 function test88
 %TEST88 test hypersparse matrices with heap-based method
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 d.axb = 'heap' ;
 semiring.multiply = 'times' ;
@@ -23,7 +26,10 @@
         C1 = A.matrix * B.matrix ;
         C2 = GB_mex_mxm (S, [ ], [ ], semiring, A, B, d) ;
         [t method] = grbresults ;
-        assert (isequal (method, 'heap')) ;
+        % v3.1:
+        % assert (isequal (method, 'heap')) ;
+        % v3.2:
+        assert (isequal (method, 'saxpy')) ;
         assert (isequal_roundoff (C1, C2.matrix)) ;
     end
 end
diff --git a/Test/test89.m b/Test/test89.m
index 1022537e48..e6755a4c76 100644
--- a/Test/test89.m
+++ b/Test/test89.m
@@ -1,7 +1,7 @@
 function test89
 %TEST89 performance test of complex A*B
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 [save save_chunk] = nthreads_get ;
@@ -43,8 +43,8 @@
         % 1002: heap
         % 1003: dot
 
-    % GraphBLAS is slower than it could be because the complex type may be
-    % user-defined at run-time.  This uses the default method, which selects
+    % GraphBLAS is slower than it could be because the complex type is
+    % user-defined.  This uses the default method, which selects
     % Gustavson's method:
 
     C2 = GB_mex_AxB (A, B) ;
diff --git a/Test/test90.m b/Test/test90.m
index 9fb9b303a2..4552b980c5 100644
--- a/Test/test90.m
+++ b/Test/test90.m
@@ -1,14 +1,20 @@
 function test90
-%TEST90 test AxB with pre-compiled semirings: plus_rdiv and plus_rdiv2
+%TEST90 test AxB with user-defined semirings: plus_rdiv and plus_rdiv2
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
-fprintf ('\n -------------- GB_AxB_meta with pre-compiled semiring\n') ;
+fprintf ('\n -------------- A*B plus_rdiv (user-defined semiring)\n') ;
+
+    % 1001: Gustavson
+    % 1002: heap
+    % 1003: dot
+    % 1004: hash
+    % 1005: saxpy
 
 rng ('default') ;
 
-for N = [10 100 1000]
+for N = [10 100] % 1000]
 
     N
 
@@ -33,7 +39,7 @@
     t = grbresults ; fprintf ('GB time %g\n', t) ;
     assert (norm (C1-C2,1) / norm (C1,1) < 1e-10) ;
 
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method: %d\n', method) ;
         cprint = (N <= 10) ;
         tic
@@ -76,10 +82,6 @@
     [i j x] = find (B) ;
     Y = sparse (i, j, 1./x, n, n) ;
 
-    % 1001: Gustavson
-    % 1002: heap
-    % 1003: dot
-
     flipxy = 0 ;
     at = 0 ; 
     bt = 0 ;
@@ -87,7 +89,7 @@
     tic
     C0 = X*B ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -103,7 +105,7 @@
     tic
     C0 = X'*B ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -119,7 +121,7 @@
     tic
     C0 = X*B' ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -135,7 +137,7 @@
     tic
     C0 = X'*B' ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -151,7 +153,7 @@
     tic
     C0 = A*Y ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -167,7 +169,7 @@
     tic
     C0 = A'*Y ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -183,7 +185,7 @@
     tic
     C0 = A*Y' ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
@@ -199,7 +201,7 @@
     tic
     C0 = A'*Y' ;
     toc
-    for method = 1001:1003
+    for method = 1001:1005
         fprintf ('method %d\n', method) ;
         tic
         C5 = GB_mex_rdiv2 (A, B, at, bt, method, flipxy) ;
diff --git a/Test/test91.m b/Test/test91.m
index df98aa7775..6a99c9ca7a 100644
--- a/Test/test91.m
+++ b/Test/test91.m
@@ -1,7 +1,7 @@
 function test91
 %TEST91 test subref performance on dense vectors
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\n------------------------------ testing GB_mex_Matrix_subref\n') ;
diff --git a/Test/test92.m b/Test/test92.m
index f035be0c38..c48c00508f 100644
--- a/Test/test92.m
+++ b/Test/test92.m
@@ -1,6 +1,9 @@
 function test92
 %TEST92 test GB_subref (symbolic case)
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 fprintf ('test92:  test GB_subref (symbolic case)\n') ;
 
 rng ('default') ;
diff --git a/Test/test93.m b/Test/test93.m
index a90f5b437e..ddecf645fb 100644
--- a/Test/test93.m
+++ b/Test/test93.m
@@ -1,6 +1,9 @@
 function test93
 %TEST93 test dpagerank and ipagerank
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 addpath ('../Demo/MATLAB') ;
 
diff --git a/Test/test93b.m b/Test/test93b.m
index d492603a34..1fb143d8e5 100644
--- a/Test/test93b.m
+++ b/Test/test93b.m
@@ -1,6 +1,9 @@
 function test93b
 %TEST93B test dpagerank and ipagerank
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 rng ('default') ;
 addpath ('../Test') ;
 addpath ('../Test/spok') ;
diff --git a/Test/test94.m b/Test/test94.m
index c6b844f4a0..d8f1b4b76a 100644
--- a/Test/test94.m
+++ b/Test/test94.m
@@ -1,6 +1,9 @@
 function test94
 %TEST94 test pagerank
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 T = load ('../Demo/Matrix/west0067') ;
 n = max (max (T (:, 1:2))) + 1 ;
 A = sparse (1+T(:,1), 1+T(:,2), T(:,3), n, n) ;
diff --git a/Test/test95.m b/Test/test95.m
index 54357dfbce..bef2e51f13 100644
--- a/Test/test95.m
+++ b/Test/test95.m
@@ -1,7 +1,7 @@
 function test95
 %TEST95 performance test for GrB_transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntest95: performance tests : GrB_transpose \n') ;
diff --git a/Test/test96.m b/Test/test96.m
index f548e313d5..a9dd3e6729 100644
--- a/Test/test96.m
+++ b/Test/test96.m
@@ -1,6 +1,9 @@
 function test96
 %TEST96 test dot product
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 n = 1000 ;
 A = sprandn (n, n, 0.5) ;
 B = sprandn (n, n, 0.5) ;
diff --git a/Test/test97.m b/Test/test97.m
index c585187593..937fc04981 100644
--- a/Test/test97.m
+++ b/Test/test97.m
@@ -1,7 +1,7 @@
 function test97
 %TEST97 test GB_assign, scalar expansion and zombies
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test98.m b/Test/test98.m
index cf59fbe5c6..4f61cd3c4c 100644
--- a/Test/test98.m
+++ b/Test/test98.m
@@ -1,7 +1,7 @@
 function test98
 %TEST98 test GrB_mxm, typecasting on the fly
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 % This test is for the case when the auto AxB method selects the heap
diff --git a/Test/test99.m b/Test/test99.m
index f8fe5cda9c..e158c01ddd 100644
--- a/Test/test99.m
+++ b/Test/test99.m
@@ -1,7 +1,7 @@
 function test99
 %TEST99 test GB_mex_transpose with explicit zeros in the Mask
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/test_other.m b/Test/test_other.m
index c87886c7d6..2a39e8a352 100644
--- a/Test/test_other.m
+++ b/Test/test_other.m
@@ -1,6 +1,9 @@
 function test_other
 %TEST_OTHER installs all packages needed for extensive tests
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 here = pwd ;
 fprintf ('\n------------------installing ssget:\n') ;
 try
diff --git a/Test/testall.m b/Test/testall.m
index 3330b02032..c1b28cd7be 100644
--- a/Test/testall.m
+++ b/Test/testall.m
@@ -2,16 +2,25 @@ function testall (threads,longtests)
 %TESTALL run all GraphBLAS tests
 %
 % Usage:
-% testall ;         % runs just the shorter tests (about 15 minutes)
+% testall ;             % runs just the shorter tests (about 30 minutes)
 %
 % testall(threads) ;    % run with specific list of threads and chunk sizes
+% testall([ ],1) ;      % run all longer tests, with default # of threads
 %
 % threads is a cell array. Each entry is 2-by-1, with the first value being
-% the # of threads to use and the 2nd being the chunk size.
+% the # of threads to use and the 2nd being the chunk size.  The default is
+% {[4 1]} if empty or not present.
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
+try
+    GrB.finalize
+catch
+end
+clear mex
+GrB.init
+
 testall_time = tic ;
 
 if (nargin < 2)
@@ -55,173 +64,114 @@ function testall (threads,longtests)
 
 % Timings below are for test coverage (Tcov), with malloc debuging enabled, on
 % hypersparse.cse.tamu.edu (20 core Xeon).  Times will differ if this test is
-% run in with malloc debugging off.
+% run with malloc debugging off.
 
 %----------------------------------------
 % test taking less than 1 second:
 %----------------------------------------
 
-%0
+logstat ('test07b',t) ; % quick test GB_mex_assign
+logstat ('test01',t) ;  % error handling
+logstat ('test01',s) ;  % error handling
+logstat ('test83',t) ;  % GrB_assign with C_replace and empty J
 logstat ('test136',s) ; % subassignment special cases
-%0
+logstat ('test98',t) ;  % GB_mex_mxm, typecast on the fly
+logstat ('test84',t) ;  % GrB_assign (row and column with C in CSR format)
+logstat ('test85',t) ;  % GrB_transpose (1-by-n with typecasting)
+logstat ('test02',t) ;  % matrix copy and dup tests
+logstat ('test148',t) ; % ewise with alias
+logstat ('test150',t) ; % mxm with zombies and typecasting
+
 logstat ('test137',s) ; % GrB_eWiseMult with FIRST and SECOND operators
-%0
 logstat ('test138',s) ; % test assign, with coarse-only tasks in IxJ slice
-%0
 logstat ('test139',s) ; % merge sort, special cases
-%0
 logstat ('test72',t) ;  % several special cases
 logstat ('test72',s) ;  % several special cases
-%0
-logstat ('test07b',t) ; % quick test GB_mex_assign
-%0
 logstat ('test09',t) ;  % duplicate I,J test of GB_mex_subassign
-%0
-logstat ('test83',t) ;  % GrB_assign with C_replace and empty J
-%0
-logstat ('test84',t) ;  % GrB_assign (row and column with C in CSR format)
-logstat ('test84',s) ;  % GrB_assign (row and column with C in CSR format)
-%0
-logstat ('test85',t) ;  % GrB_transpose (1-by-n with typecasting)
-logstat ('test85',s) ;  % GrB_transpose (1-by-n with typecasting)
-%0.1
 logstat ('test109',t) ; % terminal monoid with user-defined type
 logstat ('test109',s);  % terminal monoid with user-defined type
-%0.1
 logstat ('test110',t) ; % binary search of M(:,j) in accum/mask
-%0.1
 logstat ('test131',t) ; % GrB_Matrix_clear
-%0.1
 logstat ('test132',t) ; % setElement
-%0.1
-logstat ('test98',t) ;  % GB_mex_mxm, typecast on the fly
-logstat ('test98',s);   % GB_mex_mxm, typecast on the fly
-%0.1
 logstat ('test92',t) ;  % GB_subref (symbolic case)
-%0.1
 logstat ('test97',t) ;  % GB_mex_assign, scalar expansion and zombies
-%0.1
-logstat ('test01',t) ;  % error handling
-logstat ('test01',s) ;  % error handling
-%0.1
 logstat ('test04',t) ;  % simple mask and transpose test
-%0.1
 logstat ('test05',t) ;  % quick setElement test, with typecasting
 logstat ('test05',s);   % quick setElement test, with typecasting
-%0.1
 logstat ('test15',t) ;  % simple test of GB_mex_AxB
-logstat ('test15',s) ;  % simple test of GB_mex_AxB
-%0.1
 logstat ('test78',t) ;  % quick test of hypersparse subref
-%0.1
 logstat ('test82',t) ;  % GrB_extract with index range (hypersparse)
-%0.1
 logstat ('test94',t) ;  % pagerank
 logstat ('test94',s) ;  % pagerank
-%0.2
 logstat ('test126',t) ; % test GrB_reduce to vector on a very sparse matrix 
-%0.2
 logstat ('test03',t) ;  % random matrix tests
 logstat ('test03',s) ;  % random matrix tests
-%0.4
 logstat ('test128',t) ; % eWiseMult, eWiseAdd, special cases
-%0.4
-logstat ('test02',t) ;  % matrix copy and dup tests
-%0.4
 logstat ('test17',t) ;  % quick test of GrB_*_extractElement
-%0.5
 logstat ('test108',t) ; % boolean monoids
-%0.6
 logstat ('test124',t) ; % GrB_extract, case 6
-%0.6
 logstat ('test101',t) ; % GrB_*_import and export
-%0.6
-logstat ('test26') ;    % quick test of GxB_select
-%0.8
-logstat ('test133',t) ; % %test mask operations (GB_masker)
+logstat ('test26',t) ;  % quick test of GxB_select
+logstat ('test141',t) ; % eWiseAdd with dense matrices
+logstat ('test142',t) ; % assign with dense matrices
+logstat ('test144') ;   % cumsum
+logstat ('test145',t) ; % dot4 for C += A'*B
+logstat ('test147',t) ; % C<M>=A*B with very sparse M
+logstat ('test146',t) ; % expand scalar
+logstat ('test149',t) ; % test fine hash tasks for C<!M>=A*B
+logstat ('test133',t) ; % test mask operations (GB_masker)
 
 %----------------------------------------
 % tests taking 1 to 10 seconds:
 %----------------------------------------
 
-%1
+logstat ('test29',t) ;  % reduce with zombies
+logstat ('test90',t) ;  % test user-defined semirings
 logstat ('testc2(1)',t) ;  % complex tests (quick case)
-%1
-logstat ('test104',t) ; % export/import
-%2
 logstat ('test80',t) ;  % test GrB_mxm on all semirings (different matrix)
-%2
 logstat ('test130',t) ; % GrB_apply, hypersparse cases
-%2
+logstat ('test14',t) ;  % GrB_reduce
+
 logstat ('test129',t) ; % test GxB_select (tril and nonzero, hypersparse)
-%2
 logstat ('test102',t);  % GB_AxB_flopcount
-%2
 logstat ('test12',t) ;  % Wathen finite-element matrices (short test)
-%2
 logstat ('test28',t) ;  % mxm with aliased inputs, C<C> = accum(C,C*C)
-%2
-logstat ('test29',t) ;  % reduce with zombies
-%3
-logstat ('test107') ;   % monoids with terminal values
-%3
+logstat ('test107',t) ; % monoids with terminal values
 logstat ('test103',t) ; % GrB_transpose aliases
-%3
 logstat ('test93',t) ;  % pagerank
-%3
 logstat ('test135',t) ; % reduce to scalar
-%2
 logstat ('test100',t) ; % GB_mex_isequal
-%4
 logstat ('test11',t) ;  % exhaustive test of GrB_extractTuples
-%5
 logstat ('test106',t) ; % GxB_subassign with alias
-%6
-logstat ('test14',t) ;  % GrB_reduce
-%6
 logstat ('test69',t) ;  % assign and subassign with alias
-%7
 logstat ('test77',t) ;  % quick tests of GxB_kron
-%5 + 4
 logstat ('test19b',t) ; % GrB_assign, many pending operators (malloc debug off)
 logstat ('test19b',s);  % GrB_assign, many pending operators (malloc debug off)
+logstat ('test104',t) ; % export/import
 
 %----------------------------------------
 % tests taking 10 to 200 seconds
 %----------------------------------------
 
-%5
+logstat ('test125',t) ; % test GrB_mxm: row and column scaling
+logstat ('test74',t) ;  % test GrB_mxm on all semirings
+logstat ('test54',t) ;  % assign and extract with begin:inc:end
+logstat ('test23',t) ;  % quick test of GB_*_build
+
 logstat ('test00',s);   % GB_mex_mis (single threaded)
-%10
 logstat ('test76',t) ;  % GxB_resize
-%12
 logstat ('test88',t) ;  % hypersparse matrices with heap-based method
-%13
-logstat ('test54',t) ;  % assign and extract with begin:inc:end
-%16
 logstat ('test127',t) ; % test eWiseAdd, eWiseMult (all types and operators)
-%17
+logstat ('test143',t) ;  % mxm, special cases
 logstat ('test99',t) ;  % GB_mex_transpose with explicit zeros in the Mask
-%17
 logstat ('test19',t) ;  % GxB_subassign, many pending operators
-%18
-logstat ('test23',t) ;  % quick test of GB_*_build
-%20
-logstat ('test125',t) ; % test GrB_mxm: row and column scaling
-%33
 logstat ('test53',t) ;  % quick test of GB_mex_Matrix_extract
-%37
 logstat ('test27',t) ;  % quick test of GxB_select (band)
-%167
-logstat ('test74',t) ;  % test GrB_mxm on all semirings
 
 %----------------------------------------
 % longer tests (200 seconds to 600 seconds)
 %----------------------------------------
 
-%330
-logstat ('test90',t) ;  % test pre-compiled user-defined semirings
-
 % Turn off malloc debugging
 malloc_debugging = stat ;
 if (malloc_debugging)
@@ -232,31 +182,31 @@ function testall (threads,longtests)
     fclose (f) ;
 end
 
-%79
-logstat ('test134',t) ; % quick test of GxB_select
-
 %80
 logstat ('test10',t) ;  % GrB_apply
 
-%92
-logstat ('test16',t) ;  % user-defined complex operators
+%79
+logstat ('test134',t) ; % quick test of GxB_select
+
+%292
+logstat ('test75b',t) ;  % test GrB_mxm A'*B (quicker than test75)
 
 %96: only single-threaded is needed
 logstat ('test21',s) ;  % quick test of GB_mex_subassign
 
+%92
+logstat ('test16',t) ;  % user-defined complex operators
+
 %103
 logstat ('test81',t) ;  % GrB_Matrix_extract with stride, range, backwards
 
 %102
 logstat ('test21b',t) ; % quick test of GB_mex_assign
 
-%200
+%149
 logstat ('test18',t) ;  % quick tests of GrB_eWiseAdd and eWiseMult
 
-%292
-logstat ('test75',t) ;  % test GrB_mxm A'*B on all semirings
-
-%430
+%322 seconds, 11 statements covered
 logstat ('test20',t) ;  % quick test of GB_mex_mxm on a few semirings
 
 %-------------------------------------------------------------------------------
@@ -271,6 +221,7 @@ function testall (threads,longtests)
 % test script              % time % description
 % ------------------------ % ---- % ------------------------------
 
+logstat ('test75',t) ;     %      % test GrB_mxm A'*B on all semirings
 logstat ('test00',t) ;     %    8 % GB_mex_mis (multiple threads)
 logstat ('test07',t) ;     %    0 % quick test GB_mex_subassign
 logstat ('test07',s) ;     %    0 % quick test GB_mex_subassign
diff --git a/Test/testall2.m b/Test/testall2.m
index f71dc1bd0b..711b0fa007 100644
--- a/Test/testall2.m
+++ b/Test/testall2.m
@@ -1,5 +1,8 @@
 %TESTALL2 run testall with different # of threads
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 for k = [4 1] %  8 20]
diff --git a/Test/testall3.m b/Test/testall3.m
index 35244533c3..a794fb8b1c 100644
--- a/Test/testall3.m
+++ b/Test/testall3.m
@@ -1,5 +1,8 @@
 %TESTALL3 run testall with different # of threads
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 grbinfo
diff --git a/Test/testc1.m b/Test/testc1.m
index 64feae881a..d682ff5eef 100644
--- a/Test/testc1.m
+++ b/Test/testc1.m
@@ -1,7 +1,7 @@
 function testc1
 %TESTC1 test complex operators
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng 'default'
@@ -65,7 +65,7 @@
                 op = complex_unary {k} ;
                 C1 = GB_mex_op (op, A, '',1) ;
                 [C2 tol] = GB_user_op (op, A) ;
-                GB_user_compare (C1, C2, tol) ;
+                GB_complex_compare (C1, C2, tol) ;
             end
 
             % test unary ops with real x
@@ -73,7 +73,7 @@
                 op = complex_unary {k} ;
                 C1 = GB_mex_op (op, real (A), '',1) ;
                 [C2 tol] = GB_user_op (op, real (A)) ;
-                GB_user_compare (C1, C2, tol) ;
+                GB_complex_compare (C1, C2, tol) ;
             end
 
             for bkind = 1:6
@@ -100,7 +100,7 @@
                     op = complex_binary {k} ;
                     C1 = GB_mex_op (op, A, B, 1) ;
                     [C2 tol] = GB_user_op (op, A, B) ;
-                    GB_user_compare (C1, C2, tol) ;
+                    GB_complex_compare (C1, C2, tol) ;
                 end
 
                 % test complex(A,B)
diff --git a/Test/testc2.m b/Test/testc2.m
index c81bd2c887..399967bcfe 100644
--- a/Test/testc2.m
+++ b/Test/testc2.m
@@ -1,7 +1,7 @@
 function testc2(quick)
 %TESTC2 test complex A*B, A'*B, A*B', A'*B', A+B
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
@@ -51,7 +51,7 @@ function testc2(quick)
 dtn.inp0 = 'tran' ;
 
 anum = [0 1001 1002 1003] ;
-algos = {'auto', 'gustavson', 'dot', 'heap'} ;
+algos = {'auto', 'gustavson', 'dot', 'hash'} ;
 
 seed = 1 ;
 
@@ -85,7 +85,7 @@ function testc2(quick)
                         B (2,3) = 4 ;
                     end
 
-                    for aa = 1:4
+                    for aa = 1:length(algos)
 
                         C = GB_mex_AxB (A, B, at, bt, anum (aa)) ;
 
diff --git a/Test/testc3.m b/Test/testc3.m
index 3d4c935d53..a230fab896 100644
--- a/Test/testc3.m
+++ b/Test/testc3.m
@@ -1,7 +1,7 @@
 function testc3
 %TESTC3 test complex GrB_extract
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/testc4.m b/Test/testc4.m
index f444997e84..f8d11d802d 100644
--- a/Test/testc4.m
+++ b/Test/testc4.m
@@ -1,7 +1,7 @@
 function testc4
 %TESTC4 test complex extractElement and setElement
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng ('default') ;
diff --git a/Test/testc5.m b/Test/testc5.m
index dfd030cbca..b2399277eb 100644
--- a/Test/testc5.m
+++ b/Test/testc5.m
@@ -1,7 +1,7 @@
 function testc5
 %TESTC5 test complex subref
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 seed = 1 ;
diff --git a/Test/testc6.m b/Test/testc6.m
index d63f2f771d..bf8e1d3e94 100644
--- a/Test/testc6.m
+++ b/Test/testc6.m
@@ -1,7 +1,7 @@
 function testc6
 %TESTC6 test complex apply
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 rng 'default'
@@ -31,14 +31,14 @@
             op = complex_unary {k} ;
             C1 = GB_mex_op (op, a, '',1) ;
             [C2 tol] = GB_user_op (op, a) ;
-            GB_user_compare (C1, C2, tol) ;
+            GB_complex_compare (C1, C2, tol) ;
             C1 = GB_mex_apply (C, [], [], op, A, dr) ;
             [i j x1] = find (C1.matrix) ;
             x1 = complex (x1) ;
             [i j s] = find (A) ;
             x2 = GB_user_op (op, complex (s)) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, tol) ;
+            GB_complex_compare (x1, x2, tol) ;
         end
 
         % test unary ops with complex x,z, array transposed
@@ -50,7 +50,7 @@
             [i j s] = find (D.') ;
             x2 = GB_user_op (op, complex (s)) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, true) ;
+            GB_complex_compare (x1, x2, true) ;
         end
 
         % test unary ops with complex x, real z
@@ -58,14 +58,14 @@
             op = complex_unary {k} ;
             C1 = GB_mex_op (op, a, '',1) ;
             [C2 tol] = GB_user_op (op, a) ;
-            GB_user_compare (C1, C2, tol) ;
+            GB_complex_compare (C1, C2, tol) ;
             C1 = GB_mex_apply (B, [], [], op, A, dr) ;
             [i j x1] = find (C1.matrix) ;
             x1 = complex (x1) ;
             [i j s] = find (A) ;
             x2 = GB_user_op (op, complex (s)) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, tol) ;
+            GB_complex_compare (x1, x2, tol) ;
         end
 
         % test unary ops with complex x, real z, array transposed
@@ -77,7 +77,7 @@
             [i j s] = find (D.') ;
             x2 = GB_user_op (op, complex (s)) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, true) ;
+            GB_complex_compare (x1, x2, true) ;
         end
 
         % test unary ops with real x, complex z
@@ -85,14 +85,14 @@
             op = complex_unary {k} ;
             C1 = GB_mex_op (op, b, '',1) ;
             [C2 tol] = GB_user_op (op, b) ;
-            GB_user_compare (C1, C2, tol) ;
+            GB_complex_compare (C1, C2, tol) ;
             C1 = GB_mex_apply (C, [], [], op, B, dr) ;
             [i j x1] = find (C1.matrix) ;
             x1 = complex (x1) ;
             [i j s] = find (B) ;
             x2 = GB_user_op (op, s) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, tol) ;
+            GB_complex_compare (x1, x2, tol) ;
         end
 
         % test unary ops with real x, complex z, array transposed
@@ -103,7 +103,7 @@
             [i j s] = find (E.') ;
             x2 = GB_user_op (op, s) ;
             x2 = complex (x2) ;
-            GB_user_compare (x1, x2, true) ;
+            GB_complex_compare (x1, x2, true) ;
         end
     end
 end
diff --git a/Test/testc7.m b/Test/testc7.m
index 3ee766a284..c9fd8f2ee9 100644
--- a/Test/testc7.m
+++ b/Test/testc7.m
@@ -1,7 +1,7 @@
 function testc7
 %TESTC7 test complex assign
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('\ntestc7: all complex assign C(I,J)=A --------------------------\n') ;
@@ -63,6 +63,18 @@
         c2 = GB_mex_reduce_to_scalar (cin, '', 'plus', C3) ;
         assert (abs (c1-c2) <= tol * (abs (c1) + 1)) ;
 
+        GrB.burble (1) ;
+        clear S
+        S.matrix = sparse (1i * ones (m,n)) ;
+        S.pattern = false (m,n) ;
+        cin = complex (1,1) ;
+        M = sparse (true (m,n)) ;
+        C2 = GB_mex_subassign (S, M, [ ], sparse (cin), [ ], [ ], struct ('mask', 'structural')) ;
+        C1 = sparse (ones (m,n)) ;
+        C1 (:,:) = cin ;
+        assert (norm (C1-C2.matrix, 1) < 1e-12)
+        GrB.burble (0) ;
+
     end
 end
 
diff --git a/Test/testc8.m b/Test/testc8.m
index f118345afb..5d9f5cb656 100644
--- a/Test/testc8.m
+++ b/Test/testc8.m
@@ -1,7 +1,7 @@
 function testc8
 %TESTC8 test complex eWiseAdd and eWiseMult
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('testc8: test complex eWiseAdd and eWiseMult\n') ;
diff --git a/Test/testc9.m b/Test/testc9.m
index 24f6a5bfe2..8583ac1354 100644
--- a/Test/testc9.m
+++ b/Test/testc9.m
@@ -1,7 +1,7 @@
 function testc9
 %TESTC9 test complex extractTuples
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 seed = 1 ;
diff --git a/Test/testca.m b/Test/testca.m
index 78c811ea68..17303bb123 100644
--- a/Test/testca.m
+++ b/Test/testca.m
@@ -1,7 +1,7 @@
 function testca
 %TESTCA test complex mxm, mxv, and vxm
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 fprintf ('testca: test complex mxm, mxv, and vxm\n') ;
@@ -11,9 +11,9 @@
 dtn = struct ('inp0', 'tran') ;
 dtt = struct ('inp0', 'tran', 'inp1', 'tran') ;
 
-algos = {'auto', 'heap', 'gustavson', 'dot'} ;
+algos = {'auto', 'heap', 'gustavson', 'dot', 'hash', 'saxpy'} ;
 
-for kk = 1:4
+for kk = 1:length(algos)
 dnn.algo = algos {kk} ;
 dnt.algo = algos {kk} ;
 dtn.algo = algos {kk} ;
diff --git a/Test/testcb.m b/Test/testcb.m
index 8fba222e57..b35caa50b8 100644
--- a/Test/testcb.m
+++ b/Test/testcb.m
@@ -1,7 +1,7 @@
 function testcb
 %TESTCB test complex reduce
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 tol = 1e-13 ;
diff --git a/Test/testcc.m b/Test/testcc.m
index 5f5a424221..e28298d860 100644
--- a/Test/testcc.m
+++ b/Test/testcc.m
@@ -1,7 +1,7 @@
 function testcc
 %TESTCC test complex transpose
 
-% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 % http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 dt = struct ('inp0', 'tran') ;
diff --git a/Test/testperf.m b/Test/testperf.m
index d7437fda0d..987ec98297 100644
--- a/Test/testperf.m
+++ b/Test/testperf.m
@@ -1,6 +1,9 @@
 function testperf
 %TESTPERF run all performance tests
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 t = tic ;
 fprintf ('\ntestperf:  run all performance tests\n') ;
 
diff --git a/Test/testsort.m b/Test/testsort.m
index dfa1ee1408..0330f77db1 100644
--- a/Test/testsort.m
+++ b/Test/testsort.m
@@ -1,5 +1,8 @@
 %TESTSORT test qsort and msort
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all 
 make
 
diff --git a/Test/tt.m b/Test/tt.m
index 441c6ea66f..d2e5064c95 100644
--- a/Test/tt.m
+++ b/Test/tt.m
@@ -1,5 +1,8 @@
 %TT test eWiseMult and A+B
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 tic ;
 C = A + B ;
 t = toc ;
diff --git a/Test/ttest.m b/Test/ttest.m
index 5281c93a24..aa7c21cd69 100644
--- a/Test/ttest.m
+++ b/Test/ttest.m
@@ -1,4 +1,8 @@
 % test GrB_extractTuples
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/ttt.m b/Test/ttt.m
index 956574ad30..b96e3c69b0 100644
--- a/Test/ttt.m
+++ b/Test/ttt.m
@@ -1,9 +1,12 @@
 %TTT various tests
 
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 test53 ;  % quick test of GB_mex_Matrix_extract
 test24 ;  % test of GrB_Matrix_reduce
 test10 ;  % GrB_apply
-test90 ;  % test pre-compiled user-defined semirings
+test90 ;  % test user-defined semirings
 test21b ; % quick test of GB_mex_assign
 test21 ;  % quick test of GB_mex_subassign
 test16 ;  % user-defined complex operators
diff --git a/Test/xtest.m b/Test/xtest.m
index b6c714910a..f7c35eef96 100644
--- a/Test/xtest.m
+++ b/Test/xtest.m
@@ -1,4 +1,8 @@
 % test GrB_extract
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 make
 addpath ('~/ssget') ;
diff --git a/Test/ztest.m b/Test/ztest.m
index f301cff7c8..346f7823c3 100644
--- a/Test/ztest.m
+++ b/Test/ztest.m
@@ -1,4 +1,8 @@
 % test zombie deletion
+
+% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
+% http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
+
 clear all
 nthreads_set(1)
 grbinfo
diff --git a/User/Example/.gitignore b/User/Example/.gitignore
deleted file mode 100644
index f28ac6b0e1..0000000000
--- a/User/Example/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Do not ignore this file
-!.gitignore
-
diff --git a/User/Example/my_band.m4 b/User/Example/my_band.m4
deleted file mode 100644
index c038b6448c..0000000000
--- a/User/Example/my_band.m4
+++ /dev/null
@@ -1,34 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_band.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// user-defined functions for GxB_select, to choose entries within a band
-
-#ifdef GxB_USER_INCLUDE
-
-    #define MY_BAND
-
-    typedef struct
-    {
-        int64_t lo ;
-        int64_t hi ;
-    }
-    my_bandwidth_type ;
-
-    static inline bool myband (GrB_Index i, GrB_Index j, GrB_Index nrows,
-        GrB_Index ncols, /* x is unused: */ const void *x,
-        const my_bandwidth_type *thunk)
-    {
-        int64_t i2 = (int64_t) i ;
-        int64_t j2 = (int64_t) j ;
-        return ((thunk->lo <= (j2-i2)) && ((j2-i2) <= thunk->hi)) ;
-    }
-
-#endif
-
-// The type of the thunk parameter
-GxB_Type_define(My_bandwidth_type, my_bandwidth_type) ;
-
-// Select operator to compute C = tril (triu (A, k1), k2)
-GxB_SelectOp_define(My_band, myband, NULL, My_bandwidth_type) ;
-
diff --git a/User/Example/my_boolean.m4 b/User/Example/my_boolean.m4
deleted file mode 100644
index 22562e380c..0000000000
--- a/User/Example/my_boolean.m4
+++ /dev/null
@@ -1,22 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_terminal.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// user-defined Boolean semiring.  This is just for testing.  The semiring
-// is identical to GxB_LOR_LAND_BOOL, and the monoid is identical to
-// GxB_LOR_BOOL_MONOID.  The only difference is that these objects are
-// user-defined.
-
-#ifdef GxB_USER_INCLUDE
-
-    #define MY_BOOL
-
-#endif
-
-// The LOR monoid, with identity = false and terminal = true
-GxB_Monoid_terminal_define(My_LOR, GrB_LOR, false, true) ;
-
-// The LOR_LAND semiring
-GxB_Semiring_define(My_LOR_LAND, My_LOR, GrB_LAND) ;
-
-
diff --git a/User/Example/my_complex.m4 b/User/Example/my_complex.m4
deleted file mode 100644
index 1af56caf01..0000000000
--- a/User/Example/my_complex.m4
+++ /dev/null
@@ -1,64 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_complex.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// user-defined functions for a double complex type
-
-#ifdef GxB_USER_INCLUDE
-
-    // Get the complex.h definitions, but remove "I" since it is used elsewhere
-    // in GraphBLAS.
-    #include <complex.h>
-    #undef I
-
-    // Not all complex.h definitions include the CMPLX macro
-    #ifndef CMPLX
-    #define CMPLX(real,imag) \
-        ( \
-        (double complex)((double)(real)) + \
-        (double complex)((double)(imag) * _Complex_I) \
-        )
-    #endif
-
-    // define a token so a user application can check for existence 
-    #define MY_COMPLEX
-
-    static inline void my_complex_plus
-    (
-        double complex *z,
-        const double complex *x,
-        const double complex *y
-    )
-    {
-        (*z) = (*x) + (*y) ;
-    }
-
-    static inline void my_complex_times
-    (
-        double complex *z,
-        const double complex *x,
-        const double complex *y
-    )
-    {
-        (*z) = (*x) * (*y) ;
-    }
-
-#endif
-
-// GraphBLAS does not have a complex type; this defines one:
-GxB_Type_define(My_Complex, double complex) ;
-
-// The two operators, complex add and multiply:
-GxB_BinaryOp_define(My_Complex_plus,  my_complex_plus, 
-    My_Complex, My_Complex, My_Complex) ;
-
-GxB_BinaryOp_define(My_Complex_times, my_complex_times,
-    My_Complex, My_Complex, My_Complex) ;
-
-// The plus monoid:
-GxB_Monoid_define(My_Complex_plus_monoid, My_Complex_plus, CMPLX(0,0)) ;
-
-// the conventional plus-times semiring for C=A*B for the complex case
-GxB_Semiring_define(My_Complex_plus_times, My_Complex_plus_monoid,
-    My_Complex_times) ;
-
diff --git a/User/Example/my_max.m4 b/User/Example/my_max.m4
deleted file mode 100644
index 4ec54729ab..0000000000
--- a/User/Example/my_max.m4
+++ /dev/null
@@ -1,30 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_max.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// user-defined MAX functions for GxB_Monoid_terminal_new, to choose a
-// non-default terminal value
-
-#ifdef GxB_USER_INCLUDE
-
-    #define MY_MAX
-
-    static inline void my_maxdouble
-    (
-        double *z,
-        const double *x,
-        const double *y
-    )
-    {
-        // this is not safe with NaNs
-        (*z) = ((*x) > (*y)) ? (*x) : (*y) ;
-    }
-
-#endif
-
-// max operator
-GxB_BinaryOp_define(My_Max, my_maxdouble, GrB_FP64, GrB_FP64, GrB_FP64) ;
-
-// The max monoid, with terminal value of 1
-GxB_Monoid_terminal_define(My_Max_Terminal1, My_Max, (-INFINITY), 1) ;
-
diff --git a/User/Example/my_pagerank.m4 b/User/Example/my_pagerank.m4
deleted file mode 100644
index 756c8e03e4..0000000000
--- a/User/Example/my_pagerank.m4
+++ /dev/null
@@ -1,199 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_pagerank.m4: PageRank semiring
-//------------------------------------------------------------------------------
-
-// Defines a PageRank type, operators, monoid, and semiring for the method in
-// Demo/Source/dpagerank2.c.
-
-#ifdef GxB_USER_INCLUDE
-
-// Define a token that dpagerank2.c can use to determine if these definitions
-// are available at compile-time.
-#define PAGERANK_PREDEFINED
-
-// probability of walking to random neighbor
-#define PAGERANK_DAMPING 0.85
-
-// each node has a rank value, and a constant which is 1/outdegree
-typedef struct
-{
-    double rank ;
-    double invdegree ;
-}
-pagerank_type ;
-
-// global values shared by all threads in a single pagerank computation:
-extern double pagerank_teleport, pagerank_init_rank, pagerank_rsum ;
-
-// The identity value for the pagerank_add monoid is {0,0}. For the
-// GxB_*_define macro that defines the GrB_Monoid, the identity argument must
-// be a compile-time constant (for the C definition), and it must also be
-// parsable as an argument to the m4 macro.  If the user-defined type is a
-// struct, the initializer uses curly brackets, but this causes a parsing error
-// for m4.  The solution is to define a C macro with the initialization
-// constant, and to use it in the GxB_*define m4 macro.
-#define PAGERANK_ZERO {0,0}
-
-// unary operator to divide a double entry by the scalar pagerank_rsum
-static inline
-void pagerank_div (double *z, const double *x)
-{
-    (*z) = (*x) / pagerank_rsum ;
-}
-
-// unary operator that typecasts PageRank_type to double, extracting the rank
-static inline
-void pagerank_get_rank (double *z, const pagerank_type *x)
-{
-    (*z) = (x->rank) ;
-}
-
-// unary operator to initialize a node
-static inline
-void init_page (pagerank_type *z, const double *x)
-{
-    z->rank = pagerank_init_rank ;  // all nodes start with rank 1/n
-    z->invdegree = 1. / (*x) ;      // set 1/outdegree of this node 
-}
-
-//------------------------------------------------------------------------------
-// PageRank semiring
-//------------------------------------------------------------------------------
-
-// In MATLAB notation, the new rank is computed with:
-// newrank = PAGERANK_DAMPING * (rank * D * A) + pagerank_teleport
-
-// where A is a square binary matrix of the original graph, and A(i,j)=1 if
-// page i has a link to page j.  rank is a row vector of size n.  The matrix D
-// is diagonal, with D(i,i)=1/outdegree(i), where outdegree(i) = the outdegree
-// of node i, or equivalently, outdegree(i) = sum (A (i,:)).
-
-// That is, if newrank(j) were computed with a dot product:
-//      newrank (j) = 0
-//      for all i:
-//          newrank (j) = newrank (j) + (rank (i) * D (i,i)) * A (i,j)
-
-// To accomplish this computation in a single vector-matrix multiply, the value
-// of D(i,i) is held as component of a combined data type, the pagerank_type,
-// which has both the rank(i) and the entry D(i,i).
-
-// binary multiplicative operator for the pagerank semiring
-static inline
-void pagerank_multiply
-(
-    pagerank_type *z,
-    const pagerank_type *x,
-    const bool *y
-)
-{
-    // y is the boolean entry of the matrix, A(i,j)
-    // x->rank is the rank of node i, and x->invdegree is 1/outdegree(i)
-    // note that z->invdegree is left unchanged
-    z->rank = (*y) ? ((x->rank) * (x->invdegree)) : 0 ;
-}
-
-// binary additive operator for the pagerank semiring
-static inline
-void pagerank_add
-(
-    pagerank_type *z,
-    const pagerank_type *x,
-    const pagerank_type *y
-)
-{
-    // note that z->invdegree is left unchanged; it is unused
-    z->rank = (x->rank) + (y->rank) ;
-}
-
-//------------------------------------------------------------------------------
-// pagerank accumulator
-//------------------------------------------------------------------------------
-
-// The semiring computes the vector newrank = rank*D*A.  To complete the page
-// rank computation, the new rank must be scaled by the
-// PAGERANK_DAMPING, and the pagerank_teleport must be included, which is
-// done in the page rank accumulator:
-
-// newrank = PAGERANK_DAMPING * newrank + pagerank_teleport
-
-// The PageRank_semiring does not construct the entire pagerank_type of
-// rank*D*A, since the vector that holds newrank(i) must also keep the
-// 1/invdegree(i), unchanged.  This is restored in the accumulator operator.
-
-// binary operator to accumulate the new rank from the old
-static inline
-void pagerank_accum
-(
-    pagerank_type *z,
-    const pagerank_type *x,
-    const pagerank_type *y
-)
-{
-    // note that this formula does not use the old rank:
-    // new rank = PAGERANK_DAMPING * (rank*A ) + pagerank_teleport
-    double rnew = PAGERANK_DAMPING * (y->rank) + pagerank_teleport ;
-
-    // update the rank, and copy over the inverse degree from the old page info
-    z->rank = rnew ;
-    z->invdegree = x->invdegree ;
-}
-
-//------------------------------------------------------------------------------
-// pagerank_diff: compute the change in the rank
-//------------------------------------------------------------------------------
-
-static inline
-void pagerank_diff
-(
-    pagerank_type *z,
-    const pagerank_type *x,
-    const pagerank_type *y
-)
-{
-    double delta = (x->rank) - (y->rank) ;
-    z->rank = delta * delta ;
-}
-
-#else
-
-// global variable definitions
-double pagerank_teleport, pagerank_init_rank, pagerank_rsum ;
-
-#endif
-
-// create the new Page type
-GxB_Type_define(PageRank_type, pagerank_type) ;
-
-// create the unary operator to initialize the PageRank_type of each node
-GxB_UnaryOp_define(PageRank_init, init_page, PageRank_type, GrB_FP64) ;
-
-// create PageRank_accum
-GxB_BinaryOp_define(PageRank_accum, pagerank_accum,
-    PageRank_type, PageRank_type, PageRank_type) ;
-
-// create PageRank_add operator and monoid
-GxB_BinaryOp_define(PageRank_add, pagerank_add,
-    PageRank_type, PageRank_type, PageRank_type) ;
-
-// create PageRank_monoid.  See the discussion above for PAGERANK_ZERO.
-GxB_Monoid_define(PageRank_monoid, PageRank_add, PAGERANK_ZERO) ;
-
-// create PageRank_multiply operator
-GxB_BinaryOp_define(PageRank_multiply, pagerank_multiply,
-    PageRank_type, PageRank_type, GrB_BOOL) ;
-
-// create PageRank_semiring
-GxB_Semiring_define(PageRank_semiring, PageRank_monoid,
-    PageRank_multiply) ;
-
-// create unary operator that typecasts the PageRank_type to double
-GxB_UnaryOp_define(PageRank_get, pagerank_get_rank, GrB_FP64,
-    PageRank_type) ;
-
-// create unary operator that scales the rank by pagerank_rsum
-GxB_UnaryOp_define(PageRank_div, pagerank_div, GrB_FP64, GrB_FP64) ;
-
-// create PageRank_diff operator
-GxB_BinaryOp_define(PageRank_diff, pagerank_diff,
-        PageRank_type, PageRank_type, PageRank_type) ;
-
diff --git a/User/Example/my_plus_rdiv.m4 b/User/Example/my_plus_rdiv.m4
deleted file mode 100644
index 3420764605..0000000000
--- a/User/Example/my_plus_rdiv.m4
+++ /dev/null
@@ -1,26 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_plus_rdiv.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-#ifdef GxB_USER_INCLUDE
-
-#define MY_RDIV
-
-    static inline void my_rdiv
-    (
-        double *z,
-        const double *x,
-        const double *y
-    )
-    {
-        (*z) = (*y) / (*x) ;
-    }
-
-#endif
-
-// rdiv operator
-GxB_BinaryOp_define(My_rdiv,  my_rdiv,  GrB_FP64, GrB_FP64, GrB_FP64) ;
-
-// plus-rdiv semiring
-GxB_Semiring_define(My_plus_rdiv, GxB_PLUS_FP64_MONOID, My_rdiv) ;
-
diff --git a/User/Example/my_plus_rdiv2.m4 b/User/Example/my_plus_rdiv2.m4
deleted file mode 100644
index 2b3c0e8533..0000000000
--- a/User/Example/my_plus_rdiv2.m4
+++ /dev/null
@@ -1,29 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_plus_rdiv2.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// This version tests the case when the user-defined multiply operator
-// has a different type for x and y.
-
-#ifdef GxB_USER_INCLUDE
-
-    #define MY_RDIV2
-
-    static inline void my_rdiv2
-    (
-        double *z,
-        const double *x,
-        const float *y
-    )
-    {
-        (*z) = ((double) (*y)) / (*x) ;
-    }
-
-#endif
-
-// rdiv2 operator
-GxB_BinaryOp_define(My_rdiv2,  my_rdiv2,  GrB_FP64, GrB_FP64, GrB_FP32) ;
-
-// plus-rdiv2 semiring
-GxB_Semiring_define(My_plus_rdiv2, GxB_PLUS_FP64_MONOID, My_rdiv2) ;
-
diff --git a/User/Example/my_scale.m4 b/User/Example/my_scale.m4
deleted file mode 100644
index 8483393316..0000000000
--- a/User/Example/my_scale.m4
+++ /dev/null
@@ -1,53 +0,0 @@
-//------------------------------------------------------------------------------
-// GraphBLAS/User/Example/my_scale.m4: example user built-in objects
-//------------------------------------------------------------------------------
-
-// user-defined unary operator: z = f(x) = my_scalar*x and its global scalar
-
-#ifdef GxB_USER_INCLUDE
-
-    //--------------------------------------------------------------------------
-    // declarations: for GraphBLAS.h
-    //--------------------------------------------------------------------------
-
-    // The following are declarations that are enabled in GraphBLAS.h and
-    // appear in all user codes that #include "GraphBLAS.h", and also in all
-    // internal GraphBLAS codes.  All user declarations (not definitions)
-    // should appear here.
-
-    #define MY_SCALE
-
-    extern double my_scalar ;
-
-    static inline void my_scale
-    (
-        double *z,
-        const double *x
-    )
-    {
-        (*z) = my_scalar * (*x) ;
-    }
-
-#else
-
-    //--------------------------------------------------------------------------
-    // definitions: code appears just once, in Source/all_user_objects.c
-    //--------------------------------------------------------------------------
-
-    // The following defintions are enabled in only a single place:
-    // SuiteSparse/GraphBLAS/Source/all_user_objects.c.  This is the place
-    // where all user-defined global variables should be defined.
-
-    double my_scalar = 0 ;
-
-#endif
-
-
-//------------------------------------------------------------------------------
-// define/declare the GrB_UnaryOp My_scale
-//------------------------------------------------------------------------------
-
-// Unary operator to compute z = my_scalar*x
-
-GxB_UnaryOp_define(My_scale, my_scale, GrB_FP64, GrB_FP64) ;
-
diff --git a/User/README.txt b/User/README.txt
deleted file mode 100644
index 3ac2d36a50..0000000000
--- a/User/README.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-SuiteSparse/GraphBLAS/User: compile-time user-defined objects
---------------------------------------------------------------------------------
-
-To use the pre-compiled user-defined types, operators, monoids, and semirings,
-move one or more of the User/Example/*m4 files into this User/ directory, or
-create your own *m4 file using the GxB_*_define functions.  Then recompile
-SuiteSparse:GraphBLAS (cmake needs to be rerun).  The cmake process will
-construct the appropriate compile-time definitions.  The declarations for all
-such user-defined objects are appended to GraphBLAS/Include/GraphBLAS.h.
-
-Example *.m4 files in the User/Example folder are listed below.  These are not
-activated by default.  Move them from User/Example to User/ to use them.
-
-my_complex.m4           double complex type, and standard plus-times-complex
-                        semiring.  See also Demo/Source/usercomplex.c for a
-                        complete set of operators defined a run-time instead.
-
-my_plus_rdiv.m4         a binary operator z=y/x for double x,y,z
-
-my_plus_rdiv2.m4        a binary operator z=y/x for single x, double x,y
-
-my_scale.m4             a unary operator, z=my_scalar*x for double x,z
-
-my_band.m4              a select operator for C=tril(triu(A,hi),lo)
-
-my_pagerank.m4          pagerank type, operators, and semiring for
-                        Demo/Source/dpagerank2.c
-
-my_max.m4               user-defined monoid, with terminal value
-
diff --git a/alternative/Makefile b/alternative/Makefile
index 703758b905..545edc8949 100644
--- a/alternative/Makefile
+++ b/alternative/Makefile
@@ -2,18 +2,22 @@
 # GraphBLAS/alternative/Makefile
 #-------------------------------------------------------------------------------
 
-#  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
+#  SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
 #  http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
 
 #-------------------------------------------------------------------------------
 
+VER1 = 3
+VER2 = 2
+VER3 = 0
+
 # pick your compiler:
-# CC = gcc
+  CC = gcc
 # CC = cc
 # CC = clang
 # CC = xlc
 # CC = gcc-8
-  CC = icc -mp1
+# CC = icc -mp1
 
 # note that -mp1 is essential for icc, for proper Inf and NaN behavior.
 
@@ -21,8 +25,8 @@ SRC = ../Source/*.c ../Source/Generated/*.c
 INC = ../Include/*.h ../Source/*.h ../Source/Template/* ../Source/Generated/*.h ../Source/Generator/*.h 
 SRC2 = $(notdir $(wildcard $(SRC)))
 OBJ = $(SRC2:.c=.o)
-LDFLAGS = -fopenmp
-CFLAGS = -O3 -std=c11 -fopenmp -fexceptions -fPIC
+LDFLAGS = -fopenmp -lm
+CFLAGS = -O3 -std=c11 -fopenmp -fexceptions -fPIC -DUSER_OPENMP_THREADS
 CPPFLAGS = -I../Include -I../Source -I../Source/Template -I../Source/Generated -I../Source/Generator
 SO_OPTS = $(LDFLAGS)
 
@@ -30,12 +34,16 @@ UNAME := $(shell uname)
 ifeq ($(UNAME),Darwin)
     # Mac
     CFLAGS += -fno-common
-    SO_NAME = libgraphblas.dylib
+    SO_NAME = libgraphblas.dylib.$(VER1).$(VER2).$(VER3)
+    SO_NAME0 = libgraphblas.dylib
+    SO_NAME1 = libgraphblas.dylib.$(VER1)
     SO_OPTS += -dynamiclib -shared -undefined dynamic_lookup
 else
     # Linux
-    SO_NAME = libgraphblas.so
-    SO_OPTS += -shared -Wl,-soname -Wl,$(SO_NAME) -Wl,--no-undefined
+    SO_NAME = libgraphblas.so.$(VER1).$(VER2).$(VER3)
+    SO_NAME0 = libgraphblas.so
+    SO_NAME1 = libgraphblas.so.$(VER1)
+    SO_OPTS += -shared -Wl,-soname -Wl,$(SO_NAME)
 endif
 
 %.o: ../Source/%.c $(INC)
@@ -58,8 +66,10 @@ libgraphblas.a: $(OBJ)
 	- ranlib $@
 
 install: $(SO_NAME)
-	$(CP) $(SO_NAME) /usr/local/lib
-	$(CP) ../Include/GraphBLAS.h  /usr/local/include
+	cp $(SO_NAME) /usr/local/lib
+	ln -s $(SO_NAME) /usr/local/lib/$(SO_NAME0)
+	ln -s $(SO_NAME) /usr/local/lib/$(SO_NAME1)
+	cp ../Include/GraphBLAS.h  /usr/local/include
 
 DINC = ../Demo/Include/*.h $(INC)
 DSRC = ../Demo/Source/*.c
diff --git a/alternative/README.txt b/alternative/README.txt
index 4cfe56cd2b..d369d39b8a 100644
--- a/alternative/README.txt
+++ b/alternative/README.txt
@@ -2,11 +2,11 @@ SuiteSparse/GraphBLAS/alternative/README.txt
 
 CMake is prefered for compiling SuiteSparse/GraphBLAS, but this folder provides
 a simple alternative, if you don't have CMake or if you prefer a simple
-Makefile build process.  The Makefile does not install any user-defined objects
-in ../User/*.m4, and it will typically require modifications before it will
-work on your system.  It assumes you have the Intel compiler (icc), but this
-can easily be changed by editting the Makefile.  See the ifeq (UNAME...)
-section in the Makefile to customize the settings for Linux or the Mac.
+Makefile build process.  The Makefile will typically require modifications
+before it will work on your system.  It assumes you have the GNU compiler
+(gcc), but this can easily be changed by editting the Makefile.  See the ifeq
+(UNAME...) section in the Makefile to customize the settings for Linux or the
+Mac.
 
 Files:
 
@@ -17,7 +17,7 @@ Files:
 to compile the dynamic library:       make
 to compile the static library:        make static
 to compile in parallel with 4 cores:  make -j4
-to install in /usr/local/*:           make install
+to install in /usr/local/*:           sudo make install
 to cleanup:                           make distclean
 to compile and run the demos:         make run
 
diff --git a/build/.gitignore b/build/.gitignore
index 0e2f865e8a..52e15321b7 100644
--- a/build/.gitignore
+++ b/build/.gitignore
@@ -1,4 +1,4 @@
-# Ignore all files in GraphBLAS/build except this file.
+# Ignore all files except this file.
 *
 */
 !.gitignore